#### Generate Automated Executive Report with Mistral
This report summarizes the performance of our fine-tuned Mistral model for SMS spam detection.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ollama
from jinja2 import Template
from weasyprint import HTML
import markdown2
import os

os.makedirs("../reports", exist_ok=True)

In [2]:
# --- Load Metrics & Plot Data Summary ---
metrics_df = pd.read_csv("../data/model_metrics.csv").iloc[0]

# Load label distribution
df = pd.read_csv("../data/spam.csv", encoding='latin-1', usecols=[0,1])
df.columns = ['label', 'text']
df['label'] = df['label'].map({'ham': 0, 'spam': 1})
dist_summary = df['label'].value_counts(normalize=True).rename({0: 'ham', 1: 'spam'}).to_dict()

In [3]:
# Load top unigrams
from sklearn.feature_extraction.text import CountVectorizer
vec = CountVectorizer(ngram_range=(1, 1), stop_words='english')
X = vec.fit_transform(df[df['label'] == 1]['text'])
sum_words = X.sum(axis=0)
top_unigrams = sorted([(w, sum_words[0, i]) for w, i in vec.vocabulary_.items()], key=lambda x: x[1], reverse=True)[:10]

In [4]:
# --- Prompt LLM for Plot + Metrics Interpretation ---
summary_prompt = f"""
Act as a senior financial fraud analyst.
Interpret these findings **as if you can see the underlying visual plots**, but using only this data:

### Class Distribution
- Ham: {dist_summary.get('ham', 0):.1%}
- Spam: {dist_summary.get('spam', 0):.1%}

### Char Count Boxplot
- Ham average chars: {df[df['label']==0]['text'].str.len().mean():.1f}
- Spam average chars: {df[df['label']==1]['text'].str.len().mean():.1f}

### Top Spam Unigrams:
{', '.join([w for w, _ in top_unigrams])}

### Model Metrics
- Accuracy: {metrics_df['accuracy']:.1%}
- Precision: {metrics_df['precision']:.1%}
- Recall: {metrics_df['recall']:.1%}
- F1 Score: {metrics_df['f1']:.1%}

Generate a 2-page executive report in markdown format with the following sections:

1. Executive Summary
2. Model Evaluation
3. Risk Indicators (lexical + structural)
4. Trust & Interpretability (e.g., via LIME)
5. Governance & Recommendations

Keep the language formal and insight-driven. Do not reference the raw numbers directly. Infer insights.
"""

response = ollama.generate(model='mistral', prompt=summary_prompt)
report_body = response['response']
html_report_body = markdown2.markdown(report_body)

In [None]:
# --- HTML Template with Commentary ABOVE Figures ---
html_template = Template("""
<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <title>Fraud Detection Report</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 2em; line-height: 1.6; }
        h1, h2, h3 { color: #2c3e50; font-weight: 600; margin-top: 1.5em; }
        ul { padding-left: 1.5em; }
        li { margin-bottom: 0.4em; }
        img { max-width: 100%; height: auto; margin: 20px 0; }
        .metrics { background: #ecf0f1; padding: 10px; margin: 20px 0; border-left: 4px solid #2c3e50; }
        footer { margin-top: 3em; font-size: 0.9em; color: #555; }
    </style>
</head>
<body>
    <h1>SMS Fraud Detection System - Executive Report</h1>

    <div>{{ interpretation | safe }}</div>

    <h2>Figures & Visuals</h2>
    <h3>Class Distribution</h3>
    <img src="../plots/eda_class_dist.png" alt="Class Distribution">

    <h3>Text Length Distribution</h3>
    <img src="../plots/eda_text_length.png" alt="Character Count by Label">

    <h3>Top Fraudulent Unigrams</h3>
    <img src="../plots/top_spam_unigrams.png" alt="Top Unigrams">

    <h3>Confusion Matrix</h3>
    <img src="../plots/confusion_matrix.png" alt="Confusion Matrix">

    <h3>LIME Explanation</h3>
    <img src="../plots/lime_visualization.png" alt="LIME Explanation">

    <footer>
        <p>Generated by AI Analyst · Date: {{ date }}</p>
    </footer>
</body>
</html>
""")

html_filled = html_template.render(
    interpretation=html_report_body,
    date=pd.Timestamp.now().strftime('%Y-%m-%d')
)

html_path = "../reports/fraud_detection_report.html"
with open(html_path, "w") as f:
    f.write(html_filled)

HTML(html_path).write_pdf("../reports/fraud_detection_report.pdf")

print("HTML report with model-driven interpretation and structured figures generated.")

HTML report with model-driven interpretation and structured figures generated.


In [6]:
HTML(html_path).write_pdf("../reports/fraud_detection_report.pdf")