# Homework 1. Part 3

In [1]:
import matplotlib.pyplot as plt
import torch
from torch import nn
from torchvision import datasets
from torchvision.transforms import ToTensor
import pandas as pd
from torch.utils.data import DataLoader

In [2]:
# Setting default device

if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device = 'mps'
else:
    device = 'cpu'

display(f'{device} is available')

dtype = torch.float
torch.set_default_device(device)

'cuda is available'

### Modifiable model

In [3]:
class ChangeableNetwork(nn.Module):
    def __init__(self, modification=512):
        super().__init__()
        self.linear_relu = nn.Sequential(
            nn.Linear(28*28, modification),
            nn.ReLU(),
            nn.Linear(modification, modification),
            nn.ReLU(),
            nn.Linear(modification, 10),
        )

    def forward(self, x):
        flat = nn.Flatten()
        x = flat(x)
        logits = self.linear_relu(x)
        return logits

### Utility Functions

In [4]:
def cnt_model_params(model):
    """Count model parameters"""
    count = 0
    with torch.no_grad():
        for param in model.parameters():
            count+=param.numel()
    return count

def display_model_info(model_name, model):
    """ Display model information"""
    count = 0
    for module in model.modules():
        if isinstance(module, nn.Module):
            count+=1
    display(model)
    display(f"{model_name}. parameters: {cnt_model_params(model)}")

In [5]:
# Randomly changed the lavels on the training dataset. Kept labels the same on test dataset.
training_ds = datasets.MNIST(root="data", train=True, download=True, transform=ToTensor(), target_transform=lambda label: torch.randint(0, 10, (1,)).item())
test_ds = datasets.MNIST(root='data', train=False, download=True, transform=ToTensor())


### Training & eval loops

In [6]:
def nmist_train_loop(dataloader, model, loss_fn, optimize):
    """Training loop funciton for non-linear function"""
    model.train()
    num_batches = len(dataloader)
    train_loss, correct = 0,0
    size = len(dataloader.dataset)

    for batch, (X, y) in enumerate(dataloader):
        X = X.to(device)
        optimize.zero_grad()
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()
        correct += (y_pred.argmax(dim=1) == y).type(torch.float).sum().item()

        loss.backward()
        optimize.step()
    
    train_loss /= num_batches
    correct /=size
    return train_loss, correct
        
def nmist_val_loop(dataloader, model, loss_fn):
    """Eval loop function for non-linear function"""
    model.eval()
    size= len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0,0

    with torch.no_grad():
        for X,y in dataloader:
            X = X.to(device)
            y_pred = model(X)
            test_loss += loss_fn(y_pred, y).item()
            correct += (y_pred.argmax(dim=1) == y).type(torch.float).sum().item()
    
    test_loss /= num_batches
    correct /=size
    return test_loss, correct

## Can network fit random labels?

In [7]:
# model
random_model = ChangeableNetwork()

# hyperparameters
loss_fn = torch.nn.CrossEntropyLoss()
epochs = 50
lr = .01
batch_size=2000

random_label_df = pd.DataFrame()

# Dataloader
training_dl = DataLoader(training_ds, batch_size=batch_size)
test_dl = DataLoader(test_ds, batch_size=5000)

# convergence vals
patience = 10
best_val_loss = float('inf')
epochs_without_improvement = 0

optimizer1 = torch.optim.SGD(random_model.parameters(), lr=lr)
display("Training & eval: Model with random labels")
for epoch in range(epochs):
    train_loss, correct = nmist_train_loop(training_dl, random_model, loss_fn, optimizer1)
    val_loss, _ = nmist_val_loop(test_dl, random_model, loss_fn)

    if random_label_df.empty:
        random_label_df = pd.DataFrame([[epoch, train_loss, val_loss]], columns=['epoch','train_loss','val_loss'])
    else:
        random_label_df = pd.concat([random_label_df, pd.DataFrame([[epoch, train_loss, val_loss]], columns=['epoch','train_loss','val_loss'])])

    
    if epoch%10 == 0:
        display(f'Epoch {epoch}. Training_loss: {train_loss}. Val_loss: {val_loss}')
            
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_without_improvement = 0
    else:
        epochs_without_improvement +=1

    if epochs_without_improvement >= patience:
        display(f'Convergence reached at {epoch}')
        if epoch%10 !=0:
            random_label_df = pd.concat([random_label_df, pd.DataFrame([[epoch, train_loss, val_loss]], columns=['epoch','train_loss','val_loss'])])
        break
display("Done")



'Training & eval: Model with random labels'

'Epoch 0. Training_loss: 2.3036702473958335. Val_loss: 2.3012797832489014'

'Epoch 10. Training_loss: 2.303404211997986. Val_loss: 2.3012900352478027'

'Convergence reached at 18'

'Done'

In [8]:
random_label_df

Unnamed: 0,epoch,train_loss,val_loss
0,0,2.30367,2.30128
0,10,2.303404,2.30129
0,18,2.303353,2.30132
