In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import random
from itertools import product


In [3]:
# Check out https://pytorch.org/vision/main/generated/torchvision.transforms.Normalize.html for normalization 
# Check out https://pytorch.org/vision/main/generated/torchvision.transforms.ToTensor.html
transform = transforms.ToTensor() 


train_dataset = # Get Fashion MNIST train set
test_dataset  = # Get Fashion MNIST test set

# Split train dataset into training and validation sets
train_size = # Define train size as 0.9 and 0.1 as validation size

val_size = #
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# Check out https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader or Check out last week's notebooks
train_loader = # Define Dataloaders
val_loader   = # Define Dataloaders
test_loader  = # Define Dataloaders

In [None]:
class FashionMNIST_NN(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super(FashionMNIST_NN, self).__init__()
        
        # Define your architecture
        # self.dropout = nn.Dropout(dropout_rate)  # You can use dropout.. It's up to you

    def forward(self, x):
        # Forward Propagation operations
        return out

In [None]:
def initialize_weights(model, method='he'):
    for layer in model.children():
        if isinstance(layer, nn.Linear):
            if method == 'xavier':
                nn.init.xavier_uniform_(layer.weight)  # Xavier Initialization
            elif method == 'kaiming':
                nn.init.kaiming_uniform_(layer.weight, nonlinearity='relu')  # Kaiming (He) Initialization

            ### You can implement random_normal initialization..

In [None]:
# An implementation of EarlyStopping method
class EarlyStopping:
    def __init__(self, patience=5, delta=0):
        self.patience = patience
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.delta = delta

    def __call__(self, val_loss, model):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        torch.save(model.state_dict(), f'checkpoint_{val_loss}.pth')

In [None]:
def train_model(l1_lambda=0, l2_lambda=0.01, dropout_rate=0.5, weight_init='he'):
    model = # Define your model. Do not forget to set input dropout rate
    initialize_weights(model, weight_init) # Initialize you weights with the choosen method
    
    # Checkout https://pytorch.org/docs/stable/generated/torch.optim.SGD.html (We'll talk about optimization methods next week)
    # Define your optimizer SGD(stoshastic gradient descent) with L2 regularization (weight decay) 
    optimizer = #
    

    # Checkout https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html
    # Use cross entropy loss 
    criterion = #

    # Checkout EarlyStopping class and please ask if you didn't get it entirely. You can use our Discord
    early_stopping = EarlyStopping(patience=5)
    
    num_epochs = #
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        # Training 
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            
            # forward prop
            # compute loss
            
            # Apply L1 regularization (You can try to implement L2 :))
            l1_norm = sum(p.abs().sum() for p in model.parameters())
            loss = loss + l1_lambda * l1_norm
            
            # do zerograd
            # do backward
            # do step
            
            running_loss += loss.item()
        
        # Validation 
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:

                # forward prop 
                # compute loss
                val_loss += loss.item()

        
        #Add training losses
        train_losses.append(running_loss / len(train_loader))

        #Add validation losses
        val_losses.append(val_loss / len(val_loader))

        print(f"Epoch {epoch+1}, Train Loss: {train_losses[-1]:.4f}, Val Loss: {val_losses[-1]:.4f}")

        # Check if we do early stopping
        early_stopping(val_loss, model)

        if early_stopping.early_stop:
            print("Early stopping")
            break
    
    model.load_state_dict(torch.load('checkpoint.pth'))
    return model, train_losses, val_losses


In [None]:
# Test model 
def test_model(model):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            # Do forward prop
            # Get predictions

            #all_preds.append(preds)
            #all_labels.append(labels)
    
    # Check out https://pytorch.org/docs/stable/generated/torch.cat.html
    all_preds = torch.cat(all_preds)    # Concatenates the given sequence of seq tensors in the given dimension to compute accuracy score
    all_labels = torch.cat(all_labels)  # Concatenates the given sequence of seq tensors in the given dimension to compute accuracy score

    # https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html
    test_acc = # calcuate accuracy #accuracy_score(all_labels.cpu(), all_preds.cpu())
    print(f"Test Accuracy: {test_acc:.4f}")

In [None]:
# Plot the losses
def plot_losses(train_losses, val_losses):
    # plot train losses
    # plot val losses 
    # add xlabel 
    # add ylabel 
    # add legend 
    plt.show()

In [None]:
# Running the experiment
l1_lambda = 0.01  # L1 regularization strength
l2_lambda = 0.01  # L2 regularization strength
dropout_rate = 0.0  # Dropout probability
weight_init = 'xavier'  # Weight initialization method (he or xavier)

model, train_losses, val_losses = train_model(l1_lambda=l1_lambda, l2_lambda=l2_lambda, dropout_rate=dropout_rate, weight_init=weight_init)

# Plot the losses
plot_losses(train_losses, val_losses)

# Test the model
test_model(model)

# Hyperparameter Tuning 
## Grid Search

In [None]:
# Hyperparameter search space
param_grid = {
    'lr': [0.001, 0.0001, 0.01],              # Learning rates 
    'dropout_rate': [0.3, 0.5, 0.7],          # Different dropout rates 
    'l1_lambda': [0.0, 0.001, 0.01],          # Different L1 regularization strengths
    'l2_lambda': [0.0, 0.001, 0.01],          # Different L2 regularization strengths
    'weight_init': ['xavier', 'he']           # Different weight initialization methods
}

# Grid search
def grid_search():
    best_val_loss = float('inf')
    best_params = None
    best_model = None
    
    # Get all possible combinations of hyperparameters using Cartesian product
    param_combinations = list(product(*param_grid.values()))
    
    for i, combination in enumerate(param_combinations):
        params = dict(zip(param_grid.keys(), combination))
        print(f"Testing combination {i + 1}/{len(param_combinations)}: {params}")
        
        # Train the model with the current combination of hyperparameters
        model, train_losses, val_losses = train_model(l1_lambda=params['l1_lambda'],
                                                      l2_lambda=params['l2_lambda'],
                                                      dropout_rate=params['dropout_rate'],
                                                      weight_init=params['weight_init'])
        
        # Check the last validation loss
        val_loss = val_losses[-1]
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_params = params
            best_model = model
            print(f"New best validation loss: {best_val_loss:.4f}")

    return best_model, best_params

# Run the random search for the best hyperparameters
best_model_grid, best_params_grid = grid_search()
print("Best Hyperparameters found by Grid Search:")
print(best_params_grid)

print("Testing model from Grid Search:")
test_model(best_model_grid)

## Random Search

implement random search and find best params :)