In [7]:
import os
import pickle
import numpy as np
import pandas as pd
from sklearn.metrics import (
    mean_squared_error, r2_score, roc_auc_score, accuracy_score, log_loss
)
from sklearn.preprocessing import StandardScaler

# ‚úÖ Define Paths
MODEL_FILE = "saved_models/Gen5_Model0_20250326_022722.pkl"
DATA_FILE = "data/financial_data_full.csv"

# ‚úÖ Load Model
with open(MODEL_FILE, "rb") as f:
    model = pickle.load(f)
    print(f"‚úÖ Loaded Model: {MODEL_FILE}")

# ‚úÖ Load Data
df = pd.read_csv(DATA_FILE)
print(f"‚úÖ Loaded Data: {DATA_FILE} (Shape: {df.shape})")

# ‚úÖ Preprocess Data
target_col = "market_stress"  # Adjust if needed
if target_col not in df.columns:
    raise ValueError(f"‚ùå Target column '{target_col}' not found in dataset!")

X = df.drop(columns=[target_col])
y = df[target_col]

# Convert categorical/date columns
for col in X.select_dtypes(include=["object"]):
    try:
        X[col] = pd.to_datetime(X[col]).astype(int) / 10**9
    except:
        X[col] = X[col].astype("category").cat.codes

# Standardize Numeric Features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ‚úÖ Ensure Feature Alignment with Model
if hasattr(model, "n_features_in_"):
    model_features = model.n_features_in_
    current_features = X_scaled.shape[1]
    
    if current_features < model_features:
        missing_features = model_features - current_features
        print(f"‚ö†Ô∏è Adding {missing_features} dummy columns to match model input size.")
        X_scaled = np.hstack([X_scaled, np.zeros((X_scaled.shape[0], missing_features))])
    
    elif current_features > model_features:
        print(f"‚ö†Ô∏è Reducing {current_features - model_features} features to match model input size.")
        X_scaled = X_scaled[:, :model_features]

# ‚úÖ Make Predictions
try:
    y_pred_prob = model.predict_proba(X_scaled)[:, 1] if hasattr(model, "predict_proba") else model.predict(X_scaled)
    y_pred = (y_pred_prob >= 0.5).astype(int)
except Exception as e:
    raise ValueError(f"‚ùå Prediction error: {e}")

# ‚úÖ Compute Metrics
mse = mean_squared_error(y, y_pred_prob)
r2 = r2_score(y, y_pred_prob)
roc_auc = roc_auc_score(y, y_pred_prob) if len(np.unique(y)) > 1 else 0.5  # Handle edge cases
accuracy = accuracy_score(y, y_pred)
log_loss_value = log_loss(y, y_pred_prob)

# ‚úÖ Display Results
print("\nüìä **Model Performance Metrics**")
print(f"‚úÖ Mean Squared Error (MSE): {mse:.6f}")
print(f"‚úÖ R¬≤ Score: {r2:.6f}")
print(f"‚úÖ ROC-AUC Score: {roc_auc:.6f}")
print(f"‚úÖ Accuracy Score: {accuracy:.6f}")
print(f"‚úÖ Log Loss: {log_loss_value:.6f}")

‚úÖ Loaded Model: saved_models/Gen5_Model0_20250326_022722.pkl
‚úÖ Loaded Data: data/financial_data_full.csv (Shape: (6045, 215))
‚ö†Ô∏è Adding 3 dummy columns to match model input size.

üìä **Model Performance Metrics**
‚úÖ Mean Squared Error (MSE): 0.038848
‚úÖ R¬≤ Score: 0.073193
‚úÖ ROC-AUC Score: 0.871017
‚úÖ Accuracy Score: 0.956162
‚úÖ Log Loss: 0.141100
