<h1> Model.ipynb </h1>

This file contains the model definition and training for the Sheik classification problem. We begin by loading required packages and checking the device being used by PyTorch. Use the GPU if available.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
from tqdm import tqdm
from sklearn.model_selection import train_test_split
# import pandas as pd
import numpy as np 
from prettytable import PrettyTable
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

<h1> Model Definition</h1>
GPT4: For your model, using F.sigmoid at the end is not recommended for binary classification tasks due to numerical stability issues. Instead, it's better to output raw logits from the last layer and use a loss function that includes the sigmoid operation, like nn.BCEWithLogitsLoss. For binary classification, nn.BCEWithLogitsLoss is suitable as it combines a sigmoid layer and the BCE loss in a single class, which is more numerically stable than using a plain nn.Sigmoid followed by nn.BCELoss.

In [None]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv1d(9, 18, 3)
        self.conv2 = nn.Conv1d(18, 36, 5)
        self.pool1 = nn.Conv1d(36, 18, 9, 4)
        self.fc1 = nn.LazyLinear(128)  # Adjusted based on output shape from conv and pool layers
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool1(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        # x = F.sigmoid(self.fc3(x))
        x = self.fc3(x)  # Output raw logits
        
        return x
         
    
net = Net()    

<h1> Read Data </h1>

We reading the data saved in `data_processing.ipynb`.

In [None]:
# Load data
X  = np.load('../data/training_inputs_cart_numpy_binary.npy') # Stick input as cartesian coordinates.
# X  = np.load('../data/training_inputs_polar_numpy_binary.npy') # Stick inputs as polar coordinates.

# Load labels
y  = np.load('../data/labes_is_sheik_numpy_binary.npy')

# Print shape to make sure we have what we want.
print(X.shape)
print(y.shape)

<h1> Data Splitting </h1>

In [None]:
# Split data into training + validation and holdout sets
X_train_val, X_holdout, y_train_val, y_holdout = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Split training + validation set into separate training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, stratify=y_train_val, random_state=42)  # 0.25 * 0.8 = 0.2

<h1> Data Loader </h1>

In [None]:
# Convert arrays into tensors and create dataset objects
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32))
holdout_dataset = TensorDataset(torch.tensor(X_holdout, dtype=torch.float32), torch.tensor(y_holdout, dtype=torch.float32))

# Create data loaders
batch_size = 64  # Can be tuned
num_workers = 1 # Can be tuned
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
holdout_loader = DataLoader(holdout_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

<h1> Model Training </h1>

Below are the training setup and training loop for the model.

In [None]:
# Setup optimizer
optimizer = optim.Adam(net.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss()

def calculate_accuracy(outputs, labels):
    # Apply sigmoid and threshold at 0.5
    preds = torch.sigmoid(outputs) >= 0.5
    correct = (preds.squeeze().long() == labels.long()).float().sum()
    return correct / labels.shape[0]

def train_epoch(model, dataloader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    running_accuracy = 0.0
    for inputs, labels in dataloader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
        running_accuracy += calculate_accuracy(outputs, labels) * inputs.size(0)
        
    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_accuracy = running_accuracy / len(dataloader.dataset)
    return epoch_loss, epoch_accuracy

def validate_epoch(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    running_accuracy = 0.0
    with torch.no_grad():
        for inputs, labels in dataloader:
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)
            
            running_loss += loss.item() * inputs.size(0)
            running_accuracy += calculate_accuracy(outputs, labels) * inputs.size(0)
            
    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_accuracy = running_accuracy / len(dataloader.dataset)
    return epoch_loss, epoch_accuracy

# Training loop with progress bar, timing, and accuracy
epochs = 10
for epoch in range(epochs):
    start_time = time.time()
    
    train_loss, train_accuracy = train_epoch(net, train_loader, optimizer, criterion)
    val_loss, val_accuracy = validate_epoch(net, val_loader, criterion)
    
    end_time = time.time()
    epoch_duration = end_time - start_time
    
    tqdm.write(f'Epoch {epoch+1}/{epochs} - Duration: {epoch_duration:.2f}s - Training Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f} - Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}')

<h1> Holdout Test </h1>

We test the model on the holdout data.

In [None]:
def evaluate_holdout(model, dataloader, criterion):
    model.eval()  # Set model to evaluation mode
    running_loss = 0.0
    running_accuracy = 0.0
    with torch.no_grad():  # No gradients needed
        for inputs, labels in dataloader:
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)
            
            running_loss += loss.item() * inputs.size(0)
            running_accuracy += calculate_accuracy(outputs, labels) * inputs.size(0)
            
    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_accuracy = running_accuracy / len(dataloader.dataset)
    return epoch_loss, epoch_accuracy

# Evaluate model on holdout set after training is complete
holdout_loss, holdout_accuracy = evaluate_holdout(net, holdout_loader, criterion)
print(f'Holdout Loss: {holdout_loss:.4f}, Accuracy: {holdout_accuracy:.4f}')

<h1> Model Parameter Count </h1>

In [None]:
def count_parameters(model):
    table = PrettyTable(['Modules', 'Parameters'])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params+=params
    print(table)
    print(f'Total Trainable Params: {total_params}')
    return total_params

count_parameters(net)