In [None]:
import os
from fpdf import FPDF
import pandas as pd
import pickle
import shap
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer

# Load model and vectorizer
with open("../models/toxicity_model.pkl", "rb") as f:
    model = pickle.load(f)

with open("../models/vectorizer.pkl", "rb") as f:
    vectorizer = pickle.load(f)

# Sample inputs to explain
sample_prompts = [
    "I hate you, go to hell.",
    "You're such a kind and wonderful person.",
    "Stupid idiot, learn to speak properly!",
    "Let's have a respectful conversation.",
    "You filthy scumbag!"
]

X = vectorizer.transform(sample_prompts)
preds = model.predict_proba(X)[:, 1]

# SHAP explanations
explainer = shap.Explainer(model, X)
shap_values = explainer(X)

# Save SHAP force plots
shap_folder = "shap_explanations"
os.makedirs(shap_folder, exist_ok=True)
force_plot_paths = []

for i, val in enumerate(sample_prompts):
    out_path = f"{shap_folder}/force_plot_{i}.png"
    shap.plots.force(shap_values[i], matplotlib=True, show=False)
    plt.savefig(out_path, bbox_inches='tight')
    plt.close()
    force_plot_paths.append(out_path)

# Collect all pre-existing plots
plot_dir = "../plots"
plot_files = [os.path.join(plot_dir, f) for f in os.listdir(plot_dir) if f.endswith(('.png', '.jpg'))]
plot_files.sort()

# Generate PDF
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()

pdf.set_font("Arial", 'B', 16)
pdf.cell(0, 10, "BiasShield - Toxicity & Bias Detection Report", ln=True)

# Section 1: Sample Prompts + SHAP
for i, prompt in enumerate(sample_prompts):
    pdf.set_font("Arial", 'B', 12)
    pdf.ln(10)
    pdf.cell(0, 10, f"Prompt {i+1}:", ln=True)
    pdf.set_font("Arial", '', 11)
    pdf.multi_cell(0, 10, prompt)
    pdf.cell(0, 10, f"Predicted Toxicity Score: {round(preds[i]*100, 2)}%", ln=True)
    pdf.image(force_plot_paths[i], w=160)

# Section 2: Visual Plots
pdf.add_page()
pdf.set_font("Arial", 'B', 14)
pdf.cell(0, 10, "Exploratory Visualizations", ln=True)

for plot in plot_files:
    pdf.ln(8)
    pdf.image(plot, w=180)

# Save report
os.makedirs("pdf_reports", exist_ok=True)
pdf_path = "pdf_reports/biasshield_report.pdf"
pdf.output(pdf_path)

print(f"✅ PDF report created at: {pdf_path}")


  pdf.set_font("Arial", 'B', 16)
  pdf.cell(0, 10, "BiasShield - Toxicity & Bias Detection Report", ln=True)
  pdf.set_font("Arial", 'B', 12)
  pdf.cell(0, 10, f"Prompt {i+1}:", ln=True)
  pdf.set_font("Arial", '', 11)
  pdf.cell(0, 10, f"Predicted Toxicity Score: {round(preds[i]*100, 2)}%", ln=True)
  pdf.set_font("Arial", 'B', 14)
  pdf.cell(0, 10, "📊 Exploratory Visualizations", ln=True)


FPDFUnicodeEncodingException: Character "📊" at index 0 in text is outside the range of characters supported by the font used: "helveticaB". Please consider using a Unicode font.