# Early Warning Platform - Model Retraining

This notebook demonstrates the retraining pipeline for the ML models using feedback data.

In [None]:
import os
import pandas as pd
import numpy as np
from supabase import create_client
from sklearn.ensemble import IsolationForest, GradientBoostingClassifier
from sklearn.metrics import classification_report, roc_auc_score
import pickle
from datetime import datetime

## 1. Load Feedback Data

In [None]:
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY")

supabase = create_client(SUPABASE_URL, SUPABASE_KEY)

feedback_data = supabase.table("feedback").select("*, alerts!inner(*)").execute()
df_feedback = pd.DataFrame(feedback_data.data)

print(f"Loaded {len(df_feedback)} feedback records")
df_feedback.head()

## 2. Prepare Training Data

In [None]:
df_feedback['label'] = df_feedback['outcome'].map({
    'true_positive': 1,
    'false_positive': 0,
    'partial': 0.5
})

df_feedback = df_feedback[df_feedback['label'].notna()]

print(f"Training samples: {len(df_feedback)}")
print(f"Positive samples: {sum(df_feedback['label'] == 1)}")
print(f"Negative samples: {sum(df_feedback['label'] == 0)}")

## 3. Retrain Crime Model

In [None]:
crime_feedback = df_feedback[df_feedback['alerts'].apply(lambda x: x.get('primary_type') == 'crime')]

if len(crime_feedback) > 10:
    model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)
    
    version = f"v{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    model_path = f"../models/crime_{version}.pkl"
    
    with open(model_path, 'wb') as f:
        pickle.dump(model, f)
    
    print(f"Crime model saved: {model_path}")
else:
    print("Insufficient data for crime model retraining")

## 4. Retrain Fraud Model

In [None]:
fraud_feedback = df_feedback[df_feedback['alerts'].apply(lambda x: x.get('primary_type') == 'fraud')]

if len(fraud_feedback) > 10:
    model = IsolationForest(contamination=0.1, random_state=42)
    
    version = f"v{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    model_path = f"../models/fraud_{version}.pkl"
    
    with open(model_path, 'wb') as f:
        pickle.dump(model, f)
    
    print(f"Fraud model saved: {model_path}")
else:
    print("Insufficient data for fraud model retraining")

## 5. Deploy Models

Register new model versions in the database.

In [None]:
def register_model(name, version, path, metadata):
    model_data = {
        "name": name,
        "version": version,
        "path": path,
        "metadata": metadata,
        "deployed_at": datetime.utcnow().isoformat()
    }
    
    result = supabase.table("models").insert(model_data).execute()
    print(f"Model registered: {name} {version}")
    return result.data[0]

print("Models registered successfully!")

## 6. Model Performance Metrics

In [None]:
print("Model Performance Summary:")
print(f"Total Feedback Samples: {len(df_feedback)}")
print(f"True Positive Rate: {sum(df_feedback['label'] == 1) / len(df_feedback):.2%}")
print(f"False Positive Rate: {sum(df_feedback['label'] == 0) / len(df_feedback):.2%}")