# **Sweeps**

## **Pre-Sweep**

### **Import Dependencies**

In [8]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import wandb

import sys
sys.path.append("C:/Users/tobys/Downloads/GBM-ML-main/GBM-ML-main")

wandb.login(key='601e2bae7faf9f70cd48f1c1ae9ed183b5193d1c')

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\tobys\_netrc


True

### **Define Dataset Class**

In [9]:
# Process data
lcs = pd.read_csv('lcs.csv')
channels = ['n0', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6', 'n7', 'n8', 'n9', 'na', 'nb', 'b0', 'b1']

# Fill missing channels with noise
for channel in channels: 
    missing_indices = lcs[channel].isnull()  
    num_missing = missing_indices.sum()
    noise = np.random.normal(loc=lcs[channel].mean(), scale=lcs[channel].std(), size=num_missing)  
    lcs.loc[missing_indices, channel] = noise   

time_series_list = []
burst_ids = []
grouped = lcs.groupby('burst')
for burst, group in grouped:
    time_series_data = group[channels].values
    time_series_tensor = torch.tensor(time_series_data, dtype=torch.float32)
    time_series_list.append(time_series_tensor)
    burst_ids.append(burst)

# Padding with zeros
time_series_list = nn.utils.rnn.pad_sequence(time_series_list, batch_first=True, padding_value=0.0)

# Set sequence_length for the sweep config and model
sequence_length = time_series_list.shape[1]

# Normalize the light curves
scaler = StandardScaler()
time_series_list_2d = time_series_list.reshape(time_series_list.shape[0], -1)
time_series_list_2d = scaler.fit_transform(time_series_list_2d)
time_series_list = time_series_list_2d.reshape(time_series_list.shape)
time_series_list = torch.tensor(time_series_list, dtype=torch.float32)



# Dataset Class
class GRBDataset(Dataset):
    def __init__(self, data):
        self.data = data
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return self.data[idx]

### **Define Model Components**

In [10]:
# Bidirectional LSTM Autoencoder Model w/ attention
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, latent_size, dropout):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout,
            bidirectional=True
        )
        self.attention = nn.Linear(hidden_size * 2, 1)
        self.fc_latent = nn.Linear(hidden_size * 2, latent_size)  # compress to latent

    def forward(self, x):
        out, _ = self.lstm(x)  # out: [batch, time, hidden_size*2]

        attn_scores = self.attention(out)              # [batch, time, 1]
        attn_weights = torch.softmax(attn_scores, 1)   # normalize over time
        context = torch.sum(attn_weights * out, dim=1) # [batch, hidden_size*2]

        latent = self.fc_latent(context)               # [batch, latent_size]
        return latent, attn_weights                    # return latent features (what we're trying to extract) + attention weights


class Decoder(nn.Module):
    def __init__(self, latent_size, hidden_size, num_layers, output_size, seq_len):
        super().__init__()
        self.fc_expand = nn.Linear(latent_size, hidden_size * 2)
        self.lstm = nn.LSTM(
            input_size=hidden_size * 2,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True
        )
        self.fc_out = nn.Linear(hidden_size * 2, output_size)
        self.seq_len = seq_len

    def forward(self, latent):
        # Expand latent vector to all timesteps
        repeated = self.fc_expand(latent).unsqueeze(1).repeat(1, self.seq_len, 1)
        
        output, _ = self.lstm(repeated)     # [batch, time, hidden_size*2]
        output = self.fc_out(output)        # [batch, time, output_size]
        return output


class BiLSTMAutoencoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, latent_size, seq_len, dropout):
        super().__init__()
        self.encoder = Encoder(input_size, hidden_size, num_layers, latent_size, dropout)
        self.decoder = Decoder(latent_size, hidden_size, num_layers, input_size, seq_len)

    def forward(self, x):
        latent, attn_weights = self.encoder(x)
        reconstructed = self.decoder(latent)
        return reconstructed, latent, attn_weights

## **Sweep**

### **Sweep Training Function**

In [None]:
def train_lstm_sweep():
    wandb.init()
    config = wandb.config

    # Get data
    dataset = GRBDataset(time_series_list)
    dataloader = DataLoader(dataset, batch_size=config.batch_size, shuffle=True)

    model = BiLSTMAutoencoder(
    config.input_dim,
    config.hidden_dim,
    config.num_layers,
    config.latent_dim,
    sequence_length,
    config.dropout
)

    # Define the loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)


    # Get data
    dataset = GRBDataset(time_series_list)
    dataloader = DataLoader(dataset, batch_size=config.batch_size, shuffle=True)

    # Training loop
    for epoch in range(config.num_epochs):
        for i, batch in enumerate(dataloader):
            batch = batch.float()
            optimizer.zero_grad()
            reconstructed, _, _ = model(batch)
            loss = criterion(reconstructed, batch)
            loss.backward()
            optimizer.step()
            scheduler.step(loss)

            if i == len(dataloader) - 1:
                print(f"Epoch {epoch+1}, Final batch loss: {loss.item():.4f}")

        wandb.log({
            "epoch": epoch,
            "loss": loss.item(),
            "batch_size": batch.shape[0],
            "learning_rate": optimizer.param_groups[0]['lr']
        })

    wandb.log({"final_loss": loss.item()})
    wandb.finish()

### **Sweep Config** - _THE IMPORTANT PART_

In [12]:
num_sweeps = 15

# Flags to toggle which hyperparameters to sweep
config_flags = {
    'sweep_input_dim': False,
    'sweep_hidden_dim': False,
    'sweep_latent_dim': False,
    'sweep_num_layers': False,
    'sweep_batch_size': False,
    'sweep_learning_rate': True,
    'sweep_dropout': False,
    'sweep_method': 'bayes'  # 'random', 'bayes', or 'grid'
}

# Default values if not swept
fixed_defaults = {
    'input_dim': 14,
    'hidden_dim': 16,
    'latent_dim': 64,
    'num_layers': 2,
    'batch_size': 16,
    'learning_rate': 0.00022,
    'dropout': 0.4,
    'num_epochs': 15
}

def generate_sweep_config(flags, defaults):
    sweep_config = {
        'method': flags['sweep_method'],
        'metric': {'name': 'loss', 'goal': 'minimize'},
        'parameters': {}
    }

    sweep_config['parameters']['input_dim'] = {'values': [12, 14, 16]} if flags['sweep_input_dim'] else {'value': defaults['input_dim']}
    sweep_config['parameters']['hidden_dim'] = {'values': [16, 32, 64, 128]} if flags['sweep_hidden_dim'] else {'value': defaults['hidden_dim']}
    sweep_config['parameters']['latent_dim'] = {'values': [8, 16, 32, 64]} if flags['sweep_latent_dim'] else {'value': defaults['latent_dim']}
    sweep_config['parameters']['num_layers'] = {'values': [1, 2, 3]} if flags['sweep_num_layers'] else {'value': defaults['num_layers']}
    sweep_config['parameters']['batch_size'] = {'values': [8, 16, 32]} if flags['sweep_batch_size'] else {'value': defaults['batch_size']}

    if flags['sweep_learning_rate']:
        sweep_config['parameters']['learning_rate'] = {
            'distribution': 'log_uniform_values',
            'min': 0.00001,
            'max': 0.001
        }
    else:
        sweep_config['parameters']['learning_rate'] = {'value': defaults['learning_rate']}

    sweep_config['parameters']['dropout'] = {'values': [0.0, 0.2, 0.3, 0.4]} if flags['sweep_dropout'] else {'value': defaults['dropout']}

    sweep_config['parameters']['num_epochs'] = {'value': defaults['num_epochs']}

    return sweep_config

### **Run Sweep**

In [13]:

sweep_config = generate_sweep_config(config_flags, fixed_defaults)
sweep_id = wandb.sweep(sweep_config, project="GBM-LSTM-LR-Sweep")
wandb.agent(sweep_id, function=train_lstm_sweep, count=num_sweeps)

Create sweep with ID: bwaeu41p
Sweep URL: https://wandb.ai/tobiassafie-drexel-university/GBM-LSTM-LR-Sweep/sweeps/bwaeu41p


[34m[1mwandb[0m: Agent Starting Run: 90rfxpqp with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	hidden_dim: 16
[34m[1mwandb[0m: 	input_dim: 14
[34m[1mwandb[0m: 	latent_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0002991127364880311
[34m[1mwandb[0m: 	num_epochs: 15
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: [32m[41mERROR[0m Run 90rfxpqp errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\tobys\anaconda3\envs\star-pinn\lib\site-packages\wandb\agents\pyagent.py", line 302, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "C:\Users\tobys\AppData\Local\Temp\ipykernel_3484\1609760947.py", line 42, in train_lstm_sweep
[34m[1mwandb[0m: [32m[41mERROR[0m     if batch == len(dataloader) - 1:
[34m[1mwandb[0m: [32m[41mERROR[0m RuntimeError: Boolean value of Tensor with more than one value is ambiguous
[34m[1mwandb[0m: [32m[41mERROR[0m 
[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
