# AI Model Governance Toolkit - Report Generator Demo

This notebook demonstrates how to use the Report Generator module to create comprehensive reports for AI models, including explainability insights, bias analysis, and regulatory compliance information.

In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns

from explainability.shap_explainer import ShapExplainer
from bias_detection.fairness_metrics import BiasDetector
from report_generator.report_generator import ReportGenerator

## 1. Load and Prepare Sample Credit Data

We'll use a synthetic credit dataset with protected attributes for this demo.

In [None]:
# Generate synthetic credit data
np.random.seed(42)
n_samples = 1000

# Protected attributes
gender = np.random.choice(['M', 'F'], size=n_samples)
age = np.random.normal(35, 10, n_samples)
age = np.clip(age, 18, 80)

# Credit-related features
income = np.random.normal(50000, 20000, n_samples)
income = np.clip(income, 20000, 150000)
credit_score = np.random.normal(700, 50, n_samples)
credit_score = np.clip(credit_score, 300, 850)
debt_ratio = np.random.normal(0.3, 0.1, n_samples)
debt_ratio = np.clip(debt_ratio, 0, 1)

# Create DataFrame
data = {
    'gender': gender,
    'age': age,
    'income': income,
    'credit_score': credit_score,
    'debt_ratio': debt_ratio
}
df = pd.DataFrame(data)

# Generate target variable (loan approval) with some bias based on gender
approval_prob = (
    0.7 * (df['credit_score'] - 300) / 550 +
    0.2 * (df['income'] - 20000) / 130000 +
    0.1 * (1 - df['debt_ratio']) +
    0.1 * (df['gender'] == 'M')  # Introduce bias
)
approval_prob = np.clip(approval_prob, 0, 1)
df['loan_approved'] = (np.random.random(n_samples) < approval_prob).astype(int)

# Split data
X = df.drop('loan_approved', axis=1)
y = df['loan_approved']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale numerical features
scaler = StandardScaler()
numerical_features = ['age', 'income', 'credit_score', 'debt_ratio']
X_train[numerical_features] = scaler.fit_transform(X_train[numerical_features])
X_test[numerical_features] = scaler.transform(X_test[numerical_features])

print("Data shape:", df.shape)
print("\nSample data:")
df.head()

## 2. Train a Credit Scoring Model

We'll train a Random Forest model for credit scoring.

In [None]:
# Train Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]

print("Model Performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {precision_score(y_test, y_pred):.4f}")
print(f"Recall: {recall_score(y_test, y_pred):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred):.4f}")
print(f"ROC AUC: {roc_auc_score(y_test, y_pred_proba):.4f}")

## 3. Generate Model Explainability Insights

We'll use the ShapExplainer to generate feature importance and SHAP values.

In [None]:
# Initialize ShapExplainer
explainer = ShapExplainer(model)

# Generate feature importance
feature_importance = explainer.explain_feature_importance(X_train)
print("\nFeature Importance:")
for feature, importance in feature_importance.items():
    print(f"{feature}: {importance:.4f}")

# Generate SHAP values for a sample
sample_idx = 0
shap_values = explainer.explain_prediction(X_test.iloc[sample_idx:sample_idx+1])
print("\nSHAP Values for Sample:")
for feature, value in shap_values.items():
    print(f"{feature}: {value:.4f}")

## 4. Perform Bias Analysis

We'll use the BiasDetector to analyze potential bias in the model.

In [None]:
# Initialize BiasDetector
protected_attributes = ['gender']
privileged_groups = {'gender': 'M'}
bias_detector = BiasDetector(model, protected_attributes, privileged_groups)

# Generate bias report
bias_report = bias_detector.generate_bias_report(X_test, y_test)
print("\nBias Report:")
for metric, values in bias_report.items():
    print(f"\n{metric}:")
    for attr, value in values.items():
        print(f"  {attr}: {value:.4f}")

## 5. Generate Comprehensive Report

Now we'll use the ReportGenerator to create a comprehensive report combining all the insights.

In [None]:
# Initialize ReportGenerator
report_gen = ReportGenerator(
    model_info={
        'name': 'Credit Scoring Model',
        'version': '1.0.0',
        'type': 'Random Forest Classifier'
    }
)

# Add explainability data
report_gen.add_explainability_data(
    feature_importance=feature_importance,
    shap_values=shap_values,
    has_shap_values=True,
    has_local_explanations=True
)

# Add bias analysis data
report_gen.add_bias_analysis_data(
    bias_report=bias_report,
    protected_attributes=protected_attributes,
    privileged_groups=privileged_groups
)

# Add performance metrics
report_gen.add_performance_metrics(
    metrics={
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred),
        'recall': recall_score(y_test, y_pred),
        'f1_score': f1_score(y_test, y_pred),
        'roc_auc': roc_auc_score(y_test, y_pred_proba)
    },
    y_true=y_test,
    y_pred=y_pred,
    y_pred_proba=y_pred_proba
)

# Add regulatory compliance data
report_gen.add_regulatory_compliance(
    compliance_data={
        'EU AI Act': {
            'transparency': True,
            'fairness': bias_report['disparate_impact']['gender'] >= 0.8,
            'accuracy': accuracy_score(y_test, y_pred) >= 0.7
        },
        'GDPR': {
            'explainability': True,
            'fairness': bias_report['demographic_parity']['gender'] <= 0.05,
            'accuracy': accuracy_score(y_test, y_pred) >= 0.7
        }
    },
    regulations=['EU AI Act', 'GDPR']
)

# Add recommendations
report_gen.add_recommendations([
    {
        'title': 'Address Gender Bias',
        'description': 'The model shows potential gender bias. Consider retraining with balanced data or applying post-processing techniques.',
        'priority': 'High'
    },
    {
        'title': 'Improve Model Performance',
        'description': 'While the model performs well, there is room for improvement in precision and recall.',
        'priority': 'Medium'
    }
])

# Generate reports in different formats
html_report = report_gen.generate_html_report()
markdown_report = report_gen.generate_markdown_report()
json_report = report_gen.generate_json_report()

print("Reports generated successfully!")
print(f"HTML report length: {len(html_report)} characters")
print(f"Markdown report length: {len(markdown_report)} characters")
print(f"JSON report length: {len(json_report)} characters")

## 6. Save Reports

Let's save the generated reports to files.

In [None]:
# Save reports
with open('credit_scoring_report.html', 'w') as f:
    f.write(html_report)

with open('credit_scoring_report.md', 'w') as f:
    f.write(markdown_report)

with open('credit_scoring_report.json', 'w') as f:
    f.write(json_report)

print("Reports saved successfully!")