# Project : Optimization for Machine Learning

### Initial Setup

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import random
from collections import defaultdict
from tqdm.notebook import tqdm
import pickle

In [2]:
# Code reproducibility
def set_seed(seed=1):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

def seed_worker(worker_id):
    worker_seed = worker_id
    np.random.seed(worker_seed)
    random.seed(worker_seed)

In [3]:
BATCH_SIZE = 256
EPOCHS = 20
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Dataset Preprocessing and CNN benchmark model

In [4]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

g = torch.Generator()
g.manual_seed(42)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, generator=g, worker_init_fn=seed_worker)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False,generator=g, worker_init_fn=seed_worker)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
class CNN_model(nn.Module):
    def __init__(self):
        super(CNN_model, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 8 * 8, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

### Training and testing function

In [6]:
def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    accuracy = correct / total
    #print(f"Test Accuracy: {accuracy*100:.2f}%")
    return accuracy

In [7]:
def train(model, optimizer, criterion, epochs, train_loader):
    model.train()
    
    losses = []
    acc_history = []
    
    #add initial loss 
    initial_loss=0
    for inputs, targets in train_loader: 
        inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        initial_loss+=loss.item()
    avg_initial_loss = initial_loss / len(train_loader)
    losses.append(avg_initial_loss) 

    initial_accuracy = test(model, test_loader)
    acc_history.append(initial_accuracy)
        
    for epoch in range(epochs):
        total_loss = 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
        avg_loss = total_loss / len(train_loader)
        losses.append(avg_loss)
        
        acc_history.append(test(model, test_loader))
        #print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")
    return losses, acc_history


### Comparison of optimizers

In [8]:
learning_rates = [1e-5,1e-4,1e-3,1e-2,1e-1]

seeds = [0, 1, 2]
# Optimiseurs à comparer
optimizers_dict = {
    'Adam': lambda model, lr: optim.Adam(model.parameters(), lr=lr),
    'RMSprop': lambda model, lr: optim.RMSprop(model.parameters(), lr=lr), #0.01
    'AdaGrad': lambda model, lr: optim.Adagrad(model.parameters(), lr=lr), #0.01
    'AdamW': lambda model, lr: optim.AdamW(model.parameters(), lr=lr), #0.001
    'AmsGrad': lambda model, lr: optim.Adam(model.parameters(), lr=lr, amsgrad=True), #0.001
    'NAdam' : lambda model, lr : optim.NAdam(model.parameters(), lr=lr), #0.002
    'RAdam' : lambda model, lr : optim.RAdam(model.parameters(), lr=lr), #0.001
    'SGD' : lambda model, lr : optim.SGD(model.parameters(), lr=lr), #0.001
}

In [9]:
results = defaultdict(dict)

for opt_name, opt_fn in tqdm(optimizers_dict.items()):
    print(f"\n>>> Testing optimizer: {opt_name}")
    for lr in tqdm(learning_rates):
        print(f"  - LR = {lr}")

        results[opt_name][lr] = {
                                    'accuracies': [],
                                    'losses': []
                                }

        for seed in seeds : 
            set_seed(seed)
            model = CNN_model().to(DEVICE)
            optimizer = opt_fn(model, lr)
            criterion = nn.CrossEntropyLoss()
            losses, acc_history = train(model, optimizer, criterion, EPOCHS, train_loader)
    
            results[opt_name][lr]['accuracies'].append(acc_history)
            results[opt_name][lr]['losses'].append(losses)

    with open(f'simple_studies/8model_20epoch_saved_loss/{opt_name}', 'wb') as f : 
        pickle.dump(results, f)


for opt_name in results:
    for lr in results[opt_name]:
        accs_all_seeds = np.array(results[opt_name][lr]['accuracies'])
        results[opt_name][lr]['mean_acc'] = accs_all_seeds.mean(axis=0)
        results[opt_name][lr]['std_acc'] = accs_all_seeds.std(axis=0)

        losses_all_seeds = np.array(results[opt_name][lr]['losses'])
        mean_losses = losses_all_seeds.mean(axis=0)
        std_losses = losses_all_seeds.std(axis=0)
        results[opt_name][lr]['mean_losses'] = mean_losses
        results[opt_name][lr]['std_losses'] = std_losses

with open(f'simple_studies/8model_20epoch_saved_loss/final_losses', 'wb') as f : 
    pickle.dump(results, f)

  0%|          | 0/8 [00:00<?, ?it/s]


>>> Testing optimizer: Adam


  0%|          | 0/5 [00:00<?, ?it/s]

  - LR = 1e-05
  - LR = 0.0001
  - LR = 0.001
  - LR = 0.01
  - LR = 0.1

>>> Testing optimizer: RMSprop


  0%|          | 0/5 [00:00<?, ?it/s]

  - LR = 1e-05
  - LR = 0.0001
  - LR = 0.001
  - LR = 0.01
  - LR = 0.1

>>> Testing optimizer: AdaGrad


  0%|          | 0/5 [00:00<?, ?it/s]

  - LR = 1e-05
  - LR = 0.0001
  - LR = 0.001
  - LR = 0.01
  - LR = 0.1

>>> Testing optimizer: AdamW


  0%|          | 0/5 [00:00<?, ?it/s]

  - LR = 1e-05
  - LR = 0.0001
  - LR = 0.001
  - LR = 0.01
  - LR = 0.1

>>> Testing optimizer: AmsGrad


  0%|          | 0/5 [00:00<?, ?it/s]

  - LR = 1e-05
  - LR = 0.0001
  - LR = 0.001
  - LR = 0.01
  - LR = 0.1

>>> Testing optimizer: NAdam


  0%|          | 0/5 [00:00<?, ?it/s]

  - LR = 1e-05
  - LR = 0.0001
  - LR = 0.001
  - LR = 0.01
  - LR = 0.1

>>> Testing optimizer: RAdam


  0%|          | 0/5 [00:00<?, ?it/s]

  - LR = 1e-05
  - LR = 0.0001
  - LR = 0.001
  - LR = 0.01
  - LR = 0.1

>>> Testing optimizer: SGD


  0%|          | 0/5 [00:00<?, ?it/s]

  - LR = 1e-05
  - LR = 0.0001
  - LR = 0.001
  - LR = 0.01
  - LR = 0.1
