# Alpha

In [None]:
import torch
from torch.utils.data import DataLoader, ConcatDataset
import torch.nn as nn 
import os
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
from torchinfo import summary
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
import matplotlib.pyplot as plt 
from utils.timeseriesdataset import TimeSeriesDataset
from utils.padding import pad_batch, LABEL_PADDING_VALUE
from models.models import RegressionModel
import pickle 
from pathlib import Path

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 32
EPOCHS = 35
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 2e-6

torch.cuda.empty_cache()
print('The model is running on:', DEVICE) 

The model is running on: cpu


# Create DataLoaders

In [None]:
simulated_tracks_directory = Path("<enter dir filepath that has train val and test data>")

# for faster training we use pickled data, implementation without pickle see commented below
train_files = list(simulated_tracks_directory.glob("*/train_instances.pkl"))
val_files = list(simulated_tracks_directory.glob("*/val_instances.pkl"))
test_files = list(simulated_tracks_directory.glob("*/test_instances.pkl"))

train_instances = []
val_instances = []
test_instances = []

for file in train_files:
    with open(file, "rb") as f:
        train_instances += pickle.load(f)

for file in val_files:
    with open(file, "rb") as f:
        val_instances += pickle.load(f)

for file in test_files:
    with open(file, "rb") as f:
        test_instances += pickle.load(f)

print("Train data: ", len(train_instances), "Test data: ", len(test_instances), "Val data: ", len(val_instances))

# filepaths = list(simulated_tracks_directory.rglob('*.parquet'))
# print("Number of files found:", len(filepaths))
# random.shuffle(filepaths)
# train_instances = [TimeSeriesDataset(filepath, augment=True) for filepath in filepaths[:int(len(filepaths)*0.7)]]
# test_instances = [TimeSeriesDataset(filepath, augment=False) for filepath in filepaths[int(len(filepaths)*0.7):int(len(filepaths)*0.85)]]
# val_instances = [TimeSeriesDataset(filepath, augment=False) for filepath in filepaths[int(len(filepaths)*0.85):]]

In [None]:
conc_train = ConcatDataset(train_instances)
conc_val = ConcatDataset(val_instances)
conc_test = ConcatDataset(test_instances)

train_loader = DataLoader(conc_train, batch_size=BATCH_SIZE, shuffle=True, collate_fn=pad_batch)
test_loader = DataLoader(conc_test, batch_size=BATCH_SIZE, shuffle=True, collate_fn=pad_batch)
val_loader = DataLoader(conc_val, batch_size=BATCH_SIZE, shuffle=True, collate_fn=pad_batch)

print("DataLoader Sizes:", len(train_loader), len(test_loader), len(val_loader))

# Model
Load the model, optimizer, scheduler, loss


In [None]:
model = RegressionModel().to(DEVICE)
print(summary(model, input_size=(BATCH_SIZE, 200, 10)))

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('models/checkpoints/alpha_runs/runs_{}'.format(timestamp))
model_directory = os.path.join('models/checkpoints/alpha_model', 'model_{}'.format(timestamp))

continuous_loss_fn = nn.L1Loss(reduction='none')
best_val_loss = float("inf")
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=3)

# Training Functions

In [8]:
def train_one_epoch(model, optimizer, dataloader):
    model.train()
    running_loss = 0
    runs = 0

    for inputs, alpha_labels,_,_ in dataloader:

        inputs, alpha_labels = inputs.to(DEVICE), alpha_labels.to(DEVICE)
        mask = (alpha_labels != LABEL_PADDING_VALUE).float()

        outputs = model(inputs)
        outputs = outputs.squeeze(-1)
        total_loss = (continuous_loss_fn(outputs, alpha_labels) * mask).sum() / mask.sum()
                
        optimizer.zero_grad()
        total_loss.backward()
        # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
        optimizer.step()
        
        running_loss += total_loss.item()
        runs += 1

        progress_bar.update()

    return running_loss/runs

def evaluate_model(model, dataloader):
    model.eval()
    
    running_val_total = 0.0
    val_runs = 0

    with torch.no_grad():
        for inputs, alpha_labels,_,_ in dataloader:
            
            inputs, alpha_labels = inputs.to(DEVICE), alpha_labels.to(DEVICE)
            mask = (alpha_labels != LABEL_PADDING_VALUE).float()
            
            outputs = model(inputs)  
            outputs = outputs.squeeze(-1)
            total_loss = (continuous_loss_fn(outputs, alpha_labels) * mask).sum() / mask.sum()            
            running_val_total += total_loss.item()
            val_runs += 1
    
    return running_val_total / val_runs

# Train

In [None]:
os.makedirs(model_directory, exist_ok=True)
# model.load_state_dict(torch.load('/home/haidiri/Desktop/AnDiChallenge2024/models/checkpoints/alpha_model/model_20241025_132405/model_2'))

for epoch in range(EPOCHS):
    print('EPOCH {}:'.format(epoch + 1))

    progress_bar = tqdm(total=len(train_loader), desc='Training', position=0)

    avg_training_loss = train_one_epoch(model, optimizer, train_loader)
    val_total_loss  = evaluate_model(model, val_loader)
    
    print(f'Training LOSS: Alpha {avg_training_loss}\n'
          f'Validation LOSS: Alpha {val_total_loss} \n')
    
    writer.add_scalars('Losses', {
        'Training Alpha Loss': avg_training_loss,
        'Validation Alpha Loss': val_total_loss,
        }, epoch + 1)

    writer.flush()
    
    if val_total_loss < best_val_loss:
        best_val_loss = val_total_loss
        best_model_path = os.path.join(model_directory, f'model_{epoch + 1}')
        torch.save(model.state_dict(), best_model_path)

    scheduler.step(val_total_loss)
    
progress_bar.close()
writer.close()

In [None]:
print("Best Validation Loss:", best_val_loss)
print("Best Model Path", best_model_path)

# Testing

In [None]:
# load the best model
model = RegressionModel().to(DEVICE)
model.load_state_dict(torch.load('<best train file>'))
model.eval()

In [None]:
running_test_total = 0.0
test_runs = 0.0

predictions = []
ground_truth = []

progress_bar = tqdm(total=len(test_loader), desc='Testing', position=0)

with torch.no_grad():
    for inputs, alpha_labels,_,_ in test_loader:
        
        inputs, alpha_labels = inputs.to(DEVICE), alpha_labels.to(DEVICE)

        mask = (alpha_labels != LABEL_PADDING_VALUE).float()
        outputs = model(inputs).squeeze(-1)
        total_loss = (continuous_loss_fn(outputs, alpha_labels) * mask).sum() / mask.sum()
        
        running_test_total += total_loss.item()
        test_runs += 1

        predictions.extend(outputs.cpu().numpy())
        ground_truth.extend(alpha_labels.cpu().numpy())
        progress_bar.update()


# Calculate average losses
avg_test_loss = running_test_total / test_runs
print(f'Average test loss: {avg_test_loss}')
progress_bar.close()

# Average test loss: 0.11042290411644512 with normal weights
# Average test loss: 0.11053658076882363 with alpha fixed


# Plot Predictions

In [None]:
INDEX = 0
padding_starts = (ground_truth[INDEX] == LABEL_PADDING_VALUE).argmax() 

if padding_starts == 0:
    padding_starts = 200

pred_alpha = predictions[INDEX][:padding_starts]
true_alpha = ground_truth[INDEX][:padding_starts]
time = [i for i in range(padding_starts)]

print((pred_alpha - true_alpha).abs().mean())   
plt.scatter(time, pred_alpha, color="red")
plt.scatter(time, true_alpha, color="blue")
plt.title("Alpha Predictions")
plt.show()