# Sochastic Gradient Descent with Model Building

In [1]:
import torch
import torch.optim as optim

import time
import datetime
import numpy as np
import json
import matplotlib.pyplot as plt 

import sls
import smb
from utils import *

## Train options

In [2]:
# Epochs to train for
epochs = 20

# Dataset-Model
TrainOptions = {1:('mnist', 'mlp'), 
                2:('cifar10', 'resnet34_10'), 
                3:('cifar10', 'densenet10'), 
                4:('cifar100', 'resnet34_100'), 
                5:('cifar100', 'densenet10')
                }
dataset_name, model_name = TrainOptions[1]


# Batch Size
batch_size = 128


## Load dataset and the model

In [3]:
# Check if GPU is available
use_GPU = torch.cuda.is_available()


seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
    

# Get Dataset
train_set, test_set, train_loader = get_dataset(dataset_name, batch_size)
n_batches_per_epoch = len(train_loader)

 
# Get Model
model = get_model(model_name)
if use_GPU:
    model.cuda()
    
opt_out_list = []

## Train with SMB optimizer

SMB optimizer requires a closure function. You can its form below.

In [4]:
independent_batch = False
autoschedule = False

opt_out = {}

if independent_batch:
    opt_out.update({'name':'SMBi'})
else:
    opt_out.update({'name':'SMB'})

opt_out.update({'independent_batch':independent_batch, 
           'autoschedule':autoschedule, 
           'gamma':0.05, 
           'beta':0.9, 
           'lr':1, 
           'c':0.1, 
           'eta':0.99, 
           'data':dataset_name, 
           'model':model_name, 
           })
 
 
# loss function
criterion = softmax_loss
 
optimizer = smb.SMB(model.parameters(), 
                lr=opt_out['lr'], 
                c=opt_out['c'], 
                eta=opt_out['eta'], 
                independent_batch=opt_out['independent_batch'], 
                autoschedule=opt_out['autoschedule'],
                n_batches_per_epoch=n_batches_per_epoch)

print('\n' + 'Starting to train with {} optimizer: For {} epochs'.format(opt_out['name'], epochs))

train_loss_list = []
test_acc_list = []
run_time_list = []


for epoch in range(1, epochs+1):
    
    step_type = []
        
    begin = time.time()
    
    # training steps
    model.train()
    
    for batch_index, (data, target) in enumerate(train_loader):
        
        # moves tensors to GPU if available
        if use_GPU:
            data, target = data.cuda(), target.cuda() 
            
        # create loss closure for smb algorithm
        def closure():
            optimizer.zero_grad()
            loss = criterion(model, data, target)
            return loss
        
        # forward pass
        loss = optimizer.step(closure=closure)
        
    end = time.time()
        
    train_loss = compute_loss(model, train_set)
    test_acc = compute_accuracy(model, test_set)
        
    train_loss_list.append(train_loss)
    test_acc_list.append(test_acc)
    run_time_list.append(end-begin)
        
    # Display loss statistics
    print(f'Epoch: {epoch}   -   Training Loss: {round(train_loss, 6)}  -  Test Accuracy: {round(test_acc, 6)}  -  Time: {round(end-begin, 2)}')

    
opt_out.update({'train_loss':train_loss_list,
                 'test_acc':test_acc_list,
                 'run_time':run_time_list,
                })

opt_out_list.append(opt_out)


Starting to train with SMB optimizer: For 20 epochs
Epoch: 1   -   Training Loss: 0.534047  -  Test Accuracy: 0.8433  -  Time: 14.74
Epoch: 2   -   Training Loss: 0.287748  -  Test Accuracy: 0.9141  -  Time: 14.77
Epoch: 3   -   Training Loss: 0.262469  -  Test Accuracy: 0.919  -  Time: 14.39
Epoch: 4   -   Training Loss: 0.330394  -  Test Accuracy: 0.9036  -  Time: 14.07
Epoch: 5   -   Training Loss: 0.199763  -  Test Accuracy: 0.9392  -  Time: 13.72
Epoch: 6   -   Training Loss: 0.224365  -  Test Accuracy: 0.9315  -  Time: 13.63
Epoch: 7   -   Training Loss: 0.196397  -  Test Accuracy: 0.9393  -  Time: 13.57
Epoch: 8   -   Training Loss: 0.185515  -  Test Accuracy: 0.9379  -  Time: 13.47
Epoch: 9   -   Training Loss: 0.122668  -  Test Accuracy: 0.9562  -  Time: 13.33
Epoch: 10   -   Training Loss: 0.117434  -  Test Accuracy: 0.9605  -  Time: 13.21
Epoch: 11   -   Training Loss: 0.149203  -  Test Accuracy: 0.9499  -  Time: 13.39
Epoch: 12   -   Training Loss: 0.115166  -  Test Accura

## Train with SLS optimizer

In [None]:
opt_out = {'name':'SLS', 
           'lr':1, 
           'c':0.1, 
           'reset_option':1, 
           'data':dataset_name, 
           'model':model_name, 
           }

 
# loss function
criterion = softmax_loss


optimizer = sls.Sls(model.parameters(), 
                    init_step_size=opt_out['lr'], 
                    reset_option=opt_out['reset_option'], 
                    c=opt_out['c'], 
                    n_batches_per_epoch=n_batches_per_epoch
                   )

print('\n' + 'Starting to train with {} optimizer: For {} epochs'.format(opt_out['name'], epochs))


train_loss_list = []
train_iter_loss_list = []
test_acc_list = []
run_time_list = []

loss = None

for epoch in range(1, epochs+1):
        
    begin = time.time()
    
    # training steps
    model.train()
    
    for batch_index, (data, target) in enumerate(train_loader):
        
        # moves tensors to GPU
        if use_GPU:
            data, target = data.cuda(), target.cuda() 
            
        # create loss closure for sls algorithm
        closure = lambda :  criterion(model, data, target)  
        # clears gradients
        optimizer.zero_grad()
        
        loss = optimizer.step(closure=closure)
        
        train_iter_loss_list.append(loss.item())
        
    end = time.time()
    
    train_loss = compute_loss(model, train_set)
    test_acc = compute_accuracy(model, test_set)
        
    train_loss_list.append(train_loss)
    test_acc_list.append(test_acc)
    run_time_list.append(end-begin)
        
    # Display loss statistics
    print(f'Epoch: {epoch}   -   Training Loss: {round(train_loss, 6)}  -  Test Accuracy: {round(test_acc, 6)}  -  Time: {round(end-begin, 2)}')
    
    #print(epoch, end=' ')
    
opt_out.update({'train_loss':train_loss_list,
                 'test_acc':test_acc_list,
                 'run_time':run_time_list,
                 'train_iter_loss':train_iter_loss_list,
                })

opt_out_list.append(opt_out)


Starting to train with SLS optimizer: For 20 epochs
Epoch: 1   -   Training Loss: 0.053302  -  Test Accuracy: 0.9694  -  Time: 12.86
Epoch: 2   -   Training Loss: 0.038561  -  Test Accuracy: 0.971  -  Time: 12.79
Epoch: 3   -   Training Loss: 0.030447  -  Test Accuracy: 0.9735  -  Time: 12.94
Epoch: 4   -   Training Loss: 0.031635  -  Test Accuracy: 0.973  -  Time: 12.97
Epoch: 5   -   Training Loss: 0.020154  -  Test Accuracy: 0.9745  -  Time: 12.89
Epoch: 6   -   Training Loss: 0.017805  -  Test Accuracy: 0.9736  -  Time: 12.81
Epoch: 7   -   Training Loss: 0.017402  -  Test Accuracy: 0.9744  -  Time: 12.82
Epoch: 8   -   Training Loss: 0.013552  -  Test Accuracy: 0.9749  -  Time: 12.89
Epoch: 9   -   Training Loss: 0.014434  -  Test Accuracy: 0.9745  -  Time: 12.85
Epoch: 10   -   Training Loss: 0.011622  -  Test Accuracy: 0.9743  -  Time: 13.12
Epoch: 11   -   Training Loss: 0.008599  -  Test Accuracy: 0.9743  -  Time: 12.81
Epoch: 12   -   Training Loss: 0.007645  -  Test Accurac

## Train with ADAM optimizer

In [None]:
opt_out = {'name':'Adam', 
           'lr':0.001, 
           'data':dataset_name, 
           'model':model_name,
           } 
 
# loss function
criterion = softmax_loss

# optimizer
optimizer = optim.Adam(model.parameters(), lr = opt_out['lr'])


print('\n' + 'Starting to train with {} optimizer: For {} epochs'.format(opt_out['name'], epochs))


train_loss_list = []
test_acc_list = []
run_time_list = []
    

for epoch in range(1, epochs+1):
        
    begin = time.time()

    # training steps
    model.train()
    for batch_index, (data, target) in enumerate(train_loader):            
            
        # moves tensors to GPU
        if use_GPU:
            data, target = data.cuda(), target.cuda()     
        # clears gradients
        optimizer.zero_grad()
        # loss in batch
        loss = criterion(model, data, target)
        # backward pass for loss gradient
        loss.backward()
            
            
        # update paremeters
        optimizer.step()
            
    end = time.time()
    
    
    # Calculate metrics
    train_loss = compute_loss(model, train_set)
    test_acc = compute_accuracy(model, test_set)
    
    train_loss_list.append(train_loss)
    test_acc_list.append(test_acc)
    run_time_list.append(end-begin)
        
    # Display loss statistics
    print(f'Epoch: {epoch}   -   Training Loss: {round(train_loss, 6)}   -   Test Accuracy: {round(test_acc, 6)}  -  Time: {round(end-begin, 2)}')


opt_out.update({'train_loss':train_loss_list,
                 'test_acc':test_acc_list,
                 'run_time':run_time_list,
                })

opt_out_list.append(opt_out)

## Train with SGD optimizer

In [None]:
opt_out = {'name':'SGD', 
           'lr':0.1, 
           'data':dataset_name, 
           'model':model_name,
           }
 
# loss function
criterion = softmax_loss

# optimizer
optimizer = optim.SGD(model.parameters(), lr = opt_out['lr'])

print('\n' + 'Starting to train with {} optimizer: For {} epochs'.format(opt_out['name'], epochs))


train_loss_list = []
test_acc_list = []
run_time_list = []
    

for epoch in range(1, epochs+1):
        
    begin = time.time()

    # training steps
    model.train()
    for batch_index, (data, target) in enumerate(train_loader):            
            
        # moves tensors to GPU
        if use_GPU:
            data, target = data.cuda(), target.cuda()     
        # clears gradients
        optimizer.zero_grad()
        # loss in batch
        loss = criterion(model, data, target)
        # backward pass for loss gradient
        loss.backward()
            
            
        # update paremeters
        optimizer.step()
            
    end = time.time()
    
    
    # Calculate metrics
    train_loss = compute_loss(model, train_set)
    test_acc = compute_accuracy(model, test_set)
    
    train_loss_list.append(train_loss)
    test_acc_list.append(test_acc)
    run_time_list.append(end-begin)
        
    # Display loss statistics
    print(f'Epoch: {epoch}   -   Training Loss: {round(train_loss, 6)}   -   Test Accuracy: {round(test_acc, 6)}  -  Time: {round(end-begin, 2)}')


opt_out.update({'train_loss':train_loss_list,
                 'test_acc':test_acc_list,
                 'run_time':run_time_list,
                })

opt_out_list.append(opt_out)

In [None]:
show_loss_acc_graph(opt_out_list, "{}-{}".format(dataset_name, model_name))

In [None]:
show_time_graph(opt_out_list, "{}-{}".format(dataset_name, model_name))