# Hyperparameter Tuning for RNN/LSTM on Light Curve Data
Grid search over hyperparameters for both RNN and LSTM models

In [None]:
!git clone https://github.com/mjang01011/CS230-Detecting-Transients-in-LSST-Observatory-Data.git
%cd CS230-Detecting-Transients-in-LSST-Observatory-Data

In [None]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

In [None]:
from google.colab import files
uploaded = files.upload()
!mkdir -p data
!mv processed_training.csv data/

In [None]:
import pandas as pd
df = pd.read_csv('data/processed_training.csv')
print(f"Data shape: {df.shape}")
print(f"Unique objects: {df['object_id'].nunique()}")
print(f"Unique targets: {sorted(df['target'].unique())}")

## Hyperparameter Tuning with Grid Search

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
import pandas as pd
import numpy as np
from itertools import product
from lib.dataset import LightCurveDataset
from models.rnn import LightCurveRNN
from models.lstm import LightCurveLSTM
from models.gru import LightCurveGRU
import time
import random

# Set random seeds for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

print(f"Random seed set to: {SEED}")

# Hyperparameter grid
param_grid = {
    'model': ['rnn', 'lstm', 'gru'],
    'hidden_size': [64, 128, 256],
    'num_layers': [2, 3, 4, 5],
    'lr': [0.1, 0.01, 0.001, 0.0005],
    'batch_size': [64],
    'max_length': [200]
}

# Fixed training params
EPOCHS = 15
DATA_PATH = 'data/processed_training.csv'

print(f"Total combinations: {len(list(product(*param_grid.values())))}")

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs, device):
    best_val_acc = 0.0
    history = []
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0
        
        for batch_data, batch_labels in train_loader:
            batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(batch_data)
            loss = criterion(outputs, batch_labels)
            predicted = outputs.argmax(dim=1)
            
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            train_correct += (predicted == batch_labels).sum().item()
            train_total += batch_labels.size(0)
        
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for batch_data, batch_labels in val_loader:
                batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)
                
                outputs = model(batch_data)
                loss = criterion(outputs, batch_labels)
                predicted = outputs.argmax(dim=1)
                
                val_loss += loss.item()
                val_correct += (predicted == batch_labels).sum().item()
                val_total += batch_labels.size(0)
        
        train_loss /= len(train_loader)
        train_acc = 100 * train_correct / train_total
        val_loss /= len(val_loader)
        val_acc = 100 * val_correct / val_total
        
        history.append({
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'train_acc': train_acc,
            'val_loss': val_loss,
            'val_acc': val_acc
        })
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
    
    return best_val_acc, history

In [None]:
# Load dataset once
dataset = LightCurveDataset(DATA_PATH, max_length=200, use_flux_only=True)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

# Use generator for reproducible split
generator = torch.Generator().manual_seed(SEED)
train_dataset, val_dataset = random_split(dataset, [train_size, val_size], generator=generator)

num_classes = dataset.num_classes
device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(f"Dataset loaded: {len(dataset)} samples")
print(f"Train: {train_size}, Val: {val_size}")
print(f"Number of classes: {num_classes}")
print(f"Device: {device}")
print(f"Seed: {SEED}")

In [None]:
# Grid search
results = []
best_models = {'rnn': {'val_acc': 0, 'params': None, 'history': None},
               'lstm': {'val_acc': 0, 'params': None, 'history': None},
               'gru': {'val_acc': 0, 'params': None, 'history': None}}

total_runs = len(list(product(*param_grid.values())))
current_run = 0

for params in product(*param_grid.values()):
    current_run += 1
    model_type, hidden_size, num_layers, lr, batch_size, max_length = params
    
    print(f"\n{'='*80}")
    print(f"Run {current_run}/{total_runs}")
    print(f"Model: {model_type}, Hidden: {hidden_size}, Layers: {num_layers}, LR: {lr}, Batch: {batch_size}")
    print(f"{'='*80}")
    
    # Set seed before each run for reproducibility
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)
    
    # Create dataloaders with fixed seed
    generator = torch.Generator().manual_seed(SEED)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, generator=generator)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # Create model
    if model_type == 'rnn':
        model = LightCurveRNN(input_size=1, hidden_size=hidden_size, 
                             num_layers=num_layers, num_classes=num_classes).to(device)
    elif model_type == 'lstm':
        model = LightCurveLSTM(input_size=1, hidden_size=hidden_size, 
                              num_layers=num_layers, num_classes=num_classes).to(device)
    elif model_type == 'gru':
        model = LightCurveGRU(input_size=1, hidden_size=hidden_size, 
                             num_layers=num_layers, num_classes=num_classes).to(device)
    else:
        raise ValueError(f"Unknown model: {model_type}")
    
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    # Train
    start_time = time.time()
    best_val_acc, history = train_model(model, train_loader, val_loader, 
                                       criterion, optimizer, EPOCHS, device)
    training_time = time.time() - start_time
    
    # Record results
    result = {
        'model': model_type,
        'hidden_size': hidden_size,
        'num_layers': num_layers,
        'lr': lr,
        'batch_size': batch_size,
        'max_length': max_length,
        'best_val_acc': best_val_acc,
        'training_time': training_time,
        'final_train_loss': history[-1]['train_loss'],
        'final_train_acc': history[-1]['train_acc'],
        'final_val_loss': history[-1]['val_loss'],
        'final_val_acc': history[-1]['val_acc'],
        'seed': SEED
    }
    results.append(result)
    
    print(f"Best Val Acc: {best_val_acc:.2f}%")
    print(f"Training Time: {training_time:.2f}s")
    
    # Save best model and history
    if best_val_acc > best_models[model_type]['val_acc']:
        best_models[model_type]['val_acc'] = best_val_acc
        best_models[model_type]['params'] = result.copy()
        best_models[model_type]['history'] = history
        torch.save(model.state_dict(), f'best_{model_type}_model.pth')
        print(f"*** New best {model_type.upper()} model saved! ***")
    
    # Save intermediate results
    results_df = pd.DataFrame(results)
    results_df.to_csv('hyperparameter_results.csv', index=False)

print("\n" + "="*80)
print("Grid Search Complete!")
print("="*80)

## Results Analysis

In [None]:
# Load and display results
results_df = pd.read_csv('hyperparameter_results.csv')
results_df = results_df.sort_values('best_val_acc', ascending=False)

print("\nTop 10 Configurations:")
print(results_df.head(10))

print("\n" + "="*80)
print("Best RNN Model:")
print("="*80)
best_rnn = results_df[results_df['model'] == 'rnn'].iloc[0]
print(best_rnn)

print("\n" + "="*80)
print("Best LSTM Model:")
print("="*80)
best_lstm = results_df[results_df['model'] == 'lstm'].iloc[0]
print(best_lstm)

print("\n" + "="*80)
print("Best GRU Model:")
print("="*80)
best_gru = results_df[results_df['model'] == 'gru'].iloc[0]
print(best_gru)

In [None]:
# Plot training curves for best models
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 3, figsize=(20, 5))

for idx, model_type in enumerate(['rnn', 'lstm', 'gru']):
    history = best_models[model_type]['history']
    
    if history is None:
        print(f"No history found for {model_type}")
        continue
    
    epochs = [h['epoch'] for h in history]
    train_losses = [h['train_loss'] for h in history]
    val_losses = [h['val_loss'] for h in history]
    
    axes[idx].plot(epochs, train_losses, label='Train Loss', marker='o', linewidth=2)
    axes[idx].plot(epochs, val_losses, label='Val Loss', marker='s', linewidth=2)
    axes[idx].set_xlabel('Epoch', fontsize=12)
    axes[idx].set_ylabel('Loss', fontsize=12)
    axes[idx].set_title(f'Best {model_type.upper()} Model - Training & Validation Loss', fontsize=14, fontweight='bold')
    axes[idx].legend(fontsize=11)
    axes[idx].grid(True, alpha=0.3)
    
    params = best_models[model_type]['params']
    info_text = f"Hidden: {params['hidden_size']}, Layers: {params['num_layers']}\nLR: {params['lr']}, Batch: {params['batch_size']}\nBest Val Acc: {params['best_val_acc']:.2f}%"
    axes[idx].text(0.02, 0.98, info_text, transform=axes[idx].transAxes, 
                   fontsize=9, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

plt.tight_layout()
plt.savefig('best_models_training_curves.png', dpi=150, bbox_inches='tight')
plt.show()

print("\nBest Model Parameters:")
for model_type in ['rnn', 'lstm', 'gru']:
    params = best_models[model_type]['params']
    print(f"\n{model_type.upper()}:")
    print(f"  Hidden Size: {params['hidden_size']}")
    print(f"  Num Layers: {params['num_layers']}")
    print(f"  Learning Rate: {params['lr']}")
    print(f"  Batch Size: {params['batch_size']}")
    print(f"  Best Val Acc: {params['best_val_acc']:.2f}%")
    print(f"  Final Train Loss: {params['final_train_loss']:.4f}")
    print(f"  Final Val Loss: {params['final_val_loss']:.4f}")

In [None]:
# Download results and models
from google.colab import files

files.download('hyperparameter_results.csv')
files.download('best_models_training_curves.png')
files.download('best_rnn_model.pth')
files.download('best_lstm_model.pth')
files.download('best_gru_model.pth')