# Model Evaluation

This notebook evaluates the final trained model on the test set.

**Pipeline:**
- Load the trained model from `2_TrainBestModel.ipynb`
- Evaluate on test set
- Generate comprehensive evaluation metrics
- Create visualizations

# Imports

In [None]:
import numpy as np

import matplotlib.pyplot as plt

from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import recall_score, precision_score, ConfusionMatrixDisplay, PrecisionRecallDisplay, RocCurveDisplay

from astroML.classification import GMMBayes
from astroML.datasets import fetch_rrlyrae_combined
from astroML.utils import split_samples

from scipy.stats import norm


from astropy.io import ascii
from astropy.io import fits
import os.path

from astropy.table import Table, join, MaskedColumn, vstack, Column


import numpy as np
from astropy.table import vstack
import torch

import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelEncoder, StandardScaler



import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import numpy as np
from torch.distributions.normal import Normal
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
from sklearn.model_selection import ParameterGrid
import math
import copy
import warnings

from itertools import cycle

# Setup Paths

In [None]:
project_dir_path = "/Users/howard_willard/Desktop/TESS_Cluster_Age_ML/GaussianNN_CGW/Model_Pipeline/"
temp_files_path = project_dir_path + 'TempFiles/'

# Load Dataset

In [None]:
# ============================================================
# LOAD DATASET
# ============================================================

print("Loading dataset...")
save_path = temp_files_path
file_name = 'traintest_data2.pkl'

with open(save_path + file_name, 'rb') as f:
    PROCESSED_DATASET = pickle.load(f)

(   X_train, X_test, 
    period_train, period_test, 
    y_train, y_test, 
    feature_cols, SCALER, y_mean,
    train_cluster_names, test_cluster_names) = PROCESSED_DATASET

print("\n" + "="*70)
print("DATASET LOADED")
print("="*70)
print(f"Training set:   {X_train.shape[0]} samples")
print(f"Test set:       {X_test.shape[0]} samples")
print(f"Features:       {X_train.shape[1]} summary statistics")
print(f"Periodogram:    {period_train.shape[1]} frequency bins")
print(f"\nTest clusters:  {len(np.unique(test_cluster_names))} unique clusters")
print("="*70)

# Load Model Architecture and Helper Functions

In [None]:
# Load model class definition
import sys
sys.path.append(project_dir_path)

from GaussianNN_wPeriodogram import DualInputNN

In [None]:
# Enable autoreload - automatically reloads modules when they change
%load_ext autoreload
%autoreload 2 

# Import evaluation functions
from DataAnalysis_HelperFcns import (
    mae, rmse, coverage, crps_gaussian, Loss_Components
)

from SingleFold_PlottingFcns import (
    SummaryStats, PlotSummaryStats
)

# Load Trained Model

In [None]:
# ============================================================
# LOAD TRAINED MODEL
# ============================================================

model_path = temp_files_path + 'final_model.pt'

print("Loading trained model...")
model_package = torch.load(model_path)

# Extract model components
hyperparameters = model_package['hyperparameters']
model_architecture = model_package['model_architecture']
training_log = model_package['training_log']
best_epoch = model_package['best_epoch_from_tuning']
composite_score = model_package['composite_score']

# Display model information
print("\n" + "="*70)
print("TRAINED MODEL LOADED")
print("="*70)
print(f"Best epoch (from tuning): {best_epoch}")
print(f"Composite score:          {composite_score:.4f}")
print(f"\nModel Architecture:")
print(f"  Summary stats dim:      {model_architecture['summary_dim']}")
print(f"  Periodogram dim:        {model_architecture['periodogram_dim']}")
print(f"  Hidden layer size:      {model_architecture['hidden_size']}")
print(f"  Dropout probability:    {model_architecture['dropout_prob']}")
print(f"  Use periodogram:        {model_architecture['use_periodogram']}")
print(f"  Use CNN:                {model_architecture['use_cnn']}")
print(f"  Learn sigma:            {model_architecture['learn_sigma']}")
print("\nHyperparameters:")
for key, value in hyperparameters.items():
    print(f"  {key:30s} = {value}")
print("="*70)

# Reconstruct Model from Saved Weights

In [None]:
# ============================================================
# RECONSTRUCT MODEL
# ============================================================

print("Reconstructing model from saved weights...")

# Initialize model with saved architecture
final_model = DualInputNN(
    summary_dim=model_architecture['summary_dim'],
    periodogram_dim=model_architecture['periodogram_dim'],
    x1=model_architecture['hidden_size'],
    dropout_prob=model_architecture['dropout_prob'],
    use_periodogram=model_architecture['use_periodogram'],
    periodogram_use_cnn=model_architecture['use_cnn'],
    learn_sigma=model_architecture['learn_sigma']
)

# Load trained weights
final_model.load_state_dict(model_package['model_state_dict'])

# Set to evaluation mode
final_model.eval()

print("\nâœ… Model reconstructed and ready for evaluation!")
print(f"   Model is in evaluation mode (dropout disabled)")
print("="*70)

# Evaluation Sections (To Be Implemented)

The following sections will be added:
1. Test set predictions
2. Summary statistics
3. Prediction vs truth plots
4. Uncertainty calibration analysis
5. Residual analysis
6. Performance by cluster properties