# 05 - Visualization & Report Generation
Generate presentation-ready visualizations and the final Fraudasaurus report.

In [None]:
import sys
sys.path.insert(0, "..")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from src.visualize import (
    plot_amount_histogram, plot_volume_by_time, plot_risk_heatmap,
    plot_network_graph, plot_account_timeline, plot_anomaly_scatter
)
from src.report import generate_report, generate_markdown_report
from src.features import build_feature_matrix

figures_dir = Path("../output/figures")
figures_dir.mkdir(parents=True, exist_ok=True)

In [None]:
data_dir = Path("../data/processed")
datasets = {f.stem: pd.read_parquet(f) for f in data_dir.glob("*.parquet")}
output_dir = Path("../output")
composite = pd.read_csv(output_dir / "risk_scores.csv") if (output_dir / "risk_scores.csv").exists() else pd.DataFrame()

txn_key = next((k for k in datasets if "trans" in k.lower()), list(datasets.keys())[0] if datasets else None)
txn = datasets[txn_key] if txn_key else pd.DataFrame()
print(f"Transaction data: {txn.shape}")

## 1. Transaction Amount Distribution (Structuring Evidence)

In [None]:
if len(txn) > 0:
    amt_col = next((c for c in txn.columns if "amount" in c.lower()), None)
    if amt_col:
        fig = plot_amount_histogram(txn, amount_col=amt_col, save_path=str(figures_dir / "amount_histogram.png"))
        plt.show()

## 2. Transaction Volume Over Time

In [None]:
if len(txn) > 0:
    date_col = next((c for c in txn.columns if "date" in c.lower()), None)
    if date_col:
        fig = plot_volume_by_time(txn, date_col=date_col, save_path=str(figures_dir / "volume_by_time.png"))
        plt.show()

## 3. Risk Score Heatmap

In [None]:
if len(composite) > 0:
    # Build heatmap data from individual detector scores
    detector_files = list(output_dir.glob("detector_*.csv"))
    heatmap_data = {}
    for f in detector_files:
        name = f.stem.replace("detector_", "")
        det_df = pd.read_csv(f)
        if "account_id" in det_df.columns and "risk_score" in det_df.columns:
            heatmap_data[name] = det_df.set_index("account_id")["risk_score"]
    
    if heatmap_data:
        heatmap_df = pd.DataFrame(heatmap_data).fillna(0)
        # Show top 30 riskiest accounts
        top_accounts = composite.head(30)["account_id"].tolist()
        heatmap_subset = heatmap_df.loc[heatmap_df.index.isin(top_accounts)]
        if len(heatmap_subset) > 0:
            fig = plot_risk_heatmap(heatmap_subset, save_path=str(figures_dir / "risk_heatmap.png"))
            plt.show()

## 4. Anomaly Detection Scatter (PCA)

In [None]:
if len(txn) > 0:
    try:
        features = build_feature_matrix(txn)
        # Load anomaly results if available
        anomaly_path = output_dir / "detector_anomaly.csv"
        labels = None
        if anomaly_path.exists():
            anomaly_df = pd.read_csv(anomaly_path)
            if "account_id" in anomaly_df.columns and "risk_score" in anomaly_df.columns:
                merged = features.merge(anomaly_df[["account_id", "risk_score"]], on="account_id", how="left")
                labels = np.where(merged["risk_score"] > 0.5, -1, 1)
                features = merged.drop(columns=["risk_score"])
        fig = plot_anomaly_scatter(features.select_dtypes(include=[np.number]), labels=labels,
                                  save_path=str(figures_dir / "anomaly_scatter.png"))
        plt.show()
    except Exception as e:
        print(f"Could not build feature matrix: {e}")

## 5. Generate Final Report

In [None]:
report_context = {
    "title": "Fraudasaurus.ai - ARFI Fraud Detection Report",
    "generated_at": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M"),
    "executive_summary": (
        "Fraudasaurus.ai analyzed ARFI's financial transaction data using five complementary "
        "fraud detection methods: structuring detection, account takeover detection, check kiting "
        "analysis, dormant account monitoring, and unsupervised anomaly detection. "
        "Accounts were scored across all detectors and ranked by composite risk."
    ),
    "data_overview": {
        "Datasets loaded": len(datasets),
        "Total transactions": len(txn) if len(txn) > 0 else "N/A",
        "Risk scores computed": len(composite) if len(composite) > 0 else "N/A",
        "CRITICAL accounts": len(composite[composite["risk_tier"] == "CRITICAL"]) if "risk_tier" in composite.columns else "N/A",
        "HIGH accounts": len(composite[composite["risk_tier"] == "HIGH"]) if "risk_tier" in composite.columns else "N/A",
    },
    "findings": [
        {"name": "Structuring (BSA/CTR Evasion)", "description": "Transactions clustered below $10K reporting threshold.", "figure_path": "figures/amount_histogram.png"},
        {"name": "Risk Score Heatmap", "description": "Multi-detector risk view of highest-risk accounts.", "figure_path": "figures/risk_heatmap.png"},
        {"name": "Anomaly Detection", "description": "Isolation Forest identified statistical outliers.", "figure_path": "figures/anomaly_scatter.png"},
    ],
    "carmeg_profile": "See notebook 04_carmeg_hunt.ipynb for full CarMeg SanDiego analysis.",
    "proposed_solution": (
        "A real-time fraud scoring pipeline for ARFI:\n"
        "1. Data Layer: Consume existing Jack Henry core + Banno digital feeds\n"
        "2. Detection Layer: 4 specialized rule-based detectors + 1 ML anomaly detector\n"
        "3. Scoring Layer: Weighted combination with configurable risk tiers\n"
        "4. Alert Layer: CRITICAL=immediate, HIGH=daily review, MEDIUM=monthly trending\n"
        "5. Dashboard: Risk leaderboard with drill-down explainability\n\n"
        "Feasibility: Uses existing data, auditable rules, local ML (no data leaves ARFI), "
        "can start as nightly batch and evolve to near-real-time."
    ),
}

html = generate_report(report_context, output_path="../output/report.html")
md = generate_markdown_report(report_context, output_path="../output/report.md")
print("Reports generated:")
print(f"  HTML: output/report.html ({len(html)} chars)")
print(f"  Markdown: output/report.md ({len(md)} chars)")

## Done!
Reports and figures saved to `output/`. Ready for presentation.