# 4. Hybrid Scoring & Evaluation

Combine models for a robust 'Fraud Risk Score'.

Formula:
`fraud_risk = 0.6 * fraud_probability + 0.4 * anomaly_score`

Tasks:
1. Calculate Hybrid Score.
2. Evaluate Performance (ROC-AUC, Precision, Recall).
3. Export final Production CSV for Power BI.

In [None]:
import pandas as pd
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

INPUT_FILE = '../data/transactions_with_predictions.csv'
OUTPUT_BI_FILE = '../outputs/fraud_predictions.csv'
OUTPUT_SCORES = '../outputs/fraud_risk_scores.csv'

df = pd.read_csv(INPUT_FILE)
print(f"Data loaded. Shape: {df.shape}")

In [None]:
# 1. Hybrid Fraud Score
# anomaly_score is 0 or 1 currently. 
# If we want a smoother score, we should have used decision_function from IsoForest, but the prompt said:
# df['anomaly_score'] = df['anomaly_score'].map({1:0, -1:1}) -> which creates binary 0/1.
# The formula is: 0.6 * prob + 0.4 * anomaly_score (0 or 1)

df['fraud_risk'] = (
    0.6 * df['fraud_probability'] + 
    0.4 * df['anomaly_score']
)

print(df[['fraud_probability', 'anomaly_score', 'fraud_risk', 'Class']].head())

In [None]:
# 2. Evaluation
y_true = df['Class']
y_prob = df['fraud_risk']
y_pred = (y_prob > 0.5).astype(int)

print("Classification Report:")
print(classification_report(y_true, y_pred))

roc = roc_auc_score(y_true, y_prob)
print(f"ROC AUC Score: {roc:.4f}")

In [None]:
# 3. Export for Power BI
columns_to_export = ['UserID', 'Time', 'Amount', 'Class', 'fraud_probability', 'anomaly_score', 'fraud_risk', 'High_Risk_Flag']
# Add engineered features if useful for dashboard
if 'Amount_log' in df.columns: columns_to_export.append('Amount_log')

df[columns_to_export].to_csv(OUTPUT_BI_FILE, index=False)
print(f"Final Dataset exported to {OUTPUT_BI_FILE}")

# Also save risk scores separately if needed, as per prompt structure
df[['UserID', 'fraud_risk']].to_csv(OUTPUT_SCORES, index=False)
print(f"Risk scores exported to {OUTPUT_SCORES}")