#### Automated Report Generation with Mistral
This report summarizes the performance of our fine-tuned Mistral model for SMS spam detection.

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ollama
from jinja2 import Template
from weasyprint import HTML
import os
from sklearn.feature_extraction.text import CountVectorizer

os.makedirs("..", exist_ok=True)

In [8]:
# --- Load Metrics & Plot Data Summary ---
metrics_df = pd.read_csv("../data/model_metrics.csv").iloc[0]

In [9]:
# Load label distribution
df = pd.read_csv("../data/spam.csv", encoding='latin-1', usecols=[0,1])
df.columns = ['label', 'text']
df['label'] = df['label'].map({'ham': 0, 'spam': 1})
dist_summary = df['label'].value_counts(normalize=True).rename({0: 'ham', 1: 'spam'}).to_dict()

In [10]:
# Load top unigrams
vec = CountVectorizer(ngram_range=(1, 1), stop_words='english')
X = vec.fit_transform(df[df['label'] == 1]['text'])
sum_words = X.sum(axis=0)
top_unigrams = sorted([(w, sum_words[0, i]) for w, i in vec.vocabulary_.items()], key=lambda x: x[1], reverse=True)[:10]

In [11]:
# --- Prompt LLM for Analysis ---
summary_prompt = f"""
Act as a senior financial fraud analyst.
Write a professional 2-page executive report covering the following:

## System Summary:
- Class balance: {dist_summary.get('ham', 0):.1%} ham, {dist_summary.get('spam', 0):.1%} spam
- Avg char count: Ham: {df[df['label']==0]['text'].str.len().mean():.1f}, Spam: {df[df['label']==1]['text'].str.len().mean():.1f}
- Top spam unigrams: {', '.join([w for w, _ in top_unigrams])}
- Metrics: Accuracy: {metrics_df['accuracy']:.1%}, Precision: {metrics_df['precision']:.1%}, Recall: {metrics_df['recall']:.1%}, F1: {metrics_df['f1']:.1%}

## Report Sections:
1. Executive Summary (brief intro + outcome)
2. Model Evaluation (metrics analysis)
3. Risk Signal Insights (based on unigrams, spam length). Interpret the plots.
4. Interpretability & Trust (based on LIME)
5. Compliance & Recommendations (Basel III, governance)

Use clear section headings, bullet points for insights, and professional tone.
"""

response = ollama.generate(model='mistral', prompt=summary_prompt)
report_body = response['response']

# --- HTML Report Template ---
html_template = Template("""
<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <title>Fraud Detection Report</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 2em; line-height: 1.6; }
        h1, h2, h3 { color: #2c3e50; margin-top: 1.5em; }
        img { max-width: 100%; height: auto; margin: 20px 0; }
        .metrics { background: #ecf0f1; padding: 10px; margin: 20px 0; border-left: 4px solid #2c3e50; }
        footer { margin-top: 3em; font-size: 0.9em; color: #555; }
    </style>
</head>
<body>
    <h1>SMS Fraud Detection System - Executive Analysis</h1>

    <h2>Model Performance Summary</h2>
    <div class="metrics">
        <p><strong>Accuracy:</strong> {{ accuracy }}</p>
        <p><strong>Fraud Precision:</strong> {{ precision }}</p>
        <p><strong>Fraud Recall:</strong> {{ recall }}</p>
        <p><strong>F1 Score:</strong> {{ f1 }}</p>
    </div>

    <h2>Exploratory Data Analysis</h2>
    <img src="../plots/eda_class_dist.png" alt="Class Distribution">
    <img src="../plots/eda_text_length.png" alt="Character Count by Label">
    <img src="../plots/top_spam_unigrams.png" alt="Top Unigrams">

    <h2>Model Evaluation</h2>
    <img src="../plots/confusion_matrix.png" alt="Confusion Matrix">
    <h2>Model Interpretability</h2>
    <img src="../plots/lime_visualization.png" alt="LIME Explanation">

    <h2>Executive Commentary</h2>
    <div>{{ interpretation | safe }}</div>

    <footer>
        <p>Generated by AI Analyst · Date: {{ date }}</p>
    </footer>
</body>
</html>
""")

In [12]:
html_filled = html_template.render(
    accuracy=f"{metrics_df['accuracy']:.1%}",
    precision=f"{metrics_df['precision']:.1%}",
    recall=f"{metrics_df['recall']:.1%}",
    f1=f"{metrics_df['f1']:.1%}",
    interpretation=report_body.replace("\n", "<br>"),
    date=pd.Timestamp.now().strftime('%Y-%m-%d')
)

html_path = "..LLM_executive_report.html"
with open(html_path, "w") as f:
    f.write(html_filled)

HTML(html_path).write_pdf("..LLM_executive_report.pdf")
print("HTML and PDF reports generated. Open in browser for print to PDF.")

HTML and PDF reports generated. Open in browser for print to PDF.
