# Model Training & Evaluation

This notebook demonstrates the training process of the Hybrid XGBoost-LSTM model and evaluates its performance.

In [None]:
import sys
import os
sys.path.append('..')

import pandas as pd
import numpy as np
import xgboost as xgb
from utils.preprocessing import Preprocessor
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, roc_curve, auc
import joblib

In [None]:
# Load and Preprocess
config_path = '../07_configs/config.yaml'
preprocessor = Preprocessor(config_path)

df = pd.read_csv('../01_data/raw/upi_transactions.csv')
df_processed = preprocessor.fit_transform(df)

# Create Sequences
X_lstm, X_xgb, y = preprocessor.create_sequences(df_processed)

print(f"Data Shapes: LSTM {X_lstm.shape}, XGB {X_xgb.shape}")

## Load Trained Models
We assume models are trained via `train.py`. Here we load them for evaluation.

In [None]:
from tensorflow.keras.models import load_model

lstm_model = load_model('../02_models/artifacts/lstm_model.h5')
xgb_model = joblib.load('../02_models/artifacts/xgb_model.pkl')

## Performance Metrics

In [None]:
# Predictions
lstm_pred = lstm_model.predict(X_lstm).flatten()
xgb_pred = xgb_model.predict_proba(X_xgb)[:, 1]
hybrid_pred = 0.5 * lstm_pred + 0.5 * xgb_pred

fpr, tpr, _ = roc_curve(y, hybrid_pred)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()