# Model Training Notebook

This notebook demonstrates the process of loading the processed smart meter data (which includes anomaly labels from Isolation Forest) and loading multiple trained models to perform predictions on the data. The following models are loaded:

- **Isolation Forest Model**
- **LightGBM Model**
- **Neural Network Model** (along with its scaler)

The notebook also shows simple prediction examples for each model.

In [None]:
import pandas as pd
import numpy as np
import joblib
import lightgbm as lgb
import tensorflow as tf
import os
from sklearn.preprocessing import StandardScaler

%matplotlib inline

## Define File Paths

In [None]:
# Define paths for data and models
data_path = "data/processed/smart_meter_data_anomalies_if.csv"
if_model_path = "models/isolation_forest_model.pkl"
lgbm_model_path = "models/lightgbm_model.pkl"
nn_model_path = "models/nn_model.h5"
nn_scaler_path = "models/nn_scaler.pkl"

print("Data path:", data_path)

## Load Data

In [None]:
print(f"Attempting to load data from: {data_path}")
try:
    # Uncomment and adjust nrows if you need a smaller sample for demonstration
    # data = pd.read_csv(data_path, nrows=100000)
    data = pd.read_csv(data_path)
    print("Data loaded successfully.")
except FileNotFoundError:
    print(f"Error: Data file not found at {data_path}.")
    exit()
except Exception as e:
    print(f"Error loading data: {e}")
    exit()

## Determine Feature Columns

We select all numeric columns (excluding a few that are not features) to form our feature set. The target is defined as `anomaly`.

In [None]:
print("\nDetermining feature columns...")
numeric_features = data.select_dtypes(include=np.number).columns.tolist()
# Exclude non-feature columns
features_to_exclude = ['cluster', 'anomaly_score', 'anomaly', 'LCLid']
features = [col for col in numeric_features if col not in features_to_exclude]
target = 'anomaly'

if target not in data.columns:
    print(f"Target column '{target}' not found in data.")
    exit()
if not features:
    print("Could not determine feature columns.")
    exit()

print("Using features:", features)
X = data[features]
y = data[target]  # Not used for prediction demonstration, but defined for consistency

## Load Isolation Forest Model and Make Predictions

In [None]:
print(f"\nAttempting to load Isolation Forest model from: {if_model_path}")
if os.path.exists(if_model_path):
    try:
        if_model = joblib.load(if_model_path)
        print("Isolation Forest model loaded successfully.")
        # Example: Get anomaly scores for the first 5 rows
        print("Isolation Forest anomaly scores (first 5):")
        print(if_model.decision_function(X.head()))
        print("-" * 30)
    except Exception as e:
        print(f"Error loading Isolation Forest model: {e}")
else:
    print(f"Isolation Forest model file not found at {if_model_path}")

## Load LightGBM Model and Make Predictions

In [None]:
print(f"\nAttempting to load LightGBM model from: {lgbm_model_path}")
if os.path.exists(lgbm_model_path):
    try:
        lgbm_model = joblib.load(lgbm_model_path)
        print("LightGBM model loaded successfully.")
        # Example: Predict probabilities for the first 5 rows
        print("LightGBM predicted probabilities (first 5):")
        print(lgbm_model.predict_proba(X.head()))
        print("-" * 30)
    except Exception as e:
        print(f"Error loading LightGBM model: {e}")
else:
    print(f"LightGBM model file not found at {lgbm_model_path}")

## Load Neural Network Model and Scaler, Then Make Predictions

In [None]:
print(f"\nAttempting to load Neural Network model from: {nn_model_path}")
print(f"Attempting to load Neural Network scaler from: {nn_scaler_path}")
if os.path.exists(nn_model_path) and os.path.exists(nn_scaler_path):
    try:
        nn_model = tf.keras.models.load_model(nn_model_path)
        scaler = joblib.load(nn_scaler_path)
        print("Neural Network model and scaler loaded successfully.")
        
        # Scale the features before prediction
        X_scaled = scaler.transform(X.head())
        # Example: Predict probabilities for the first 5 rows
        print("Neural Network predicted probabilities (first 5):")
        print(nn_model.predict(X_scaled))
        print("-" * 30)
    except Exception as e:
        print(f"Error loading Neural Network model or scaler: {e}")
else:
    if not os.path.exists(nn_model_path):
        print(f"Neural Network model file not found at {nn_model_path}")
    if not os.path.exists(nn_scaler_path):
        print(f"Neural Network scaler file not found at {nn_scaler_path}")

## Conclusion

In [None]:
print("\nModel loading and prediction demonstration finished.")