In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, random_split

import optuna
import warnings
from optuna.visualization import plot_optimization_history, plot_parallel_coordinate, plot_param_importances, plot_slice

In [2]:

def _get_data(DATA_PATH, TRAIN_BATCH_SIZE, TEST_BATCH_SIZE):
    try:
        mnist_training_dataset = datasets.MNIST(root=DATA_PATH+'train', train=True, download=True, transform=ToTensor())
        mnist_testing_dataset = datasets.MNIST(root=DATA_PATH+'test', train=False, download=True, transform=ToTensor())
        
        training_dataset, validation_dataset = random_split(mnist_training_dataset, [int(0.8*len(mnist_training_dataset)), int(00.2*len(mnist_training_dataset))])
        
        train_loader = DataLoader(training_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True)
        validation_loader = DataLoader(training_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=False)
        test_loader = DataLoader(mnist_testing_dataset, batch_size=TEST_BATCH_SIZE, shuffle=False)
        
        return train_loader, validation_loader, test_loader
    
    except Exception as e:
        print('Unable to get data due to ', e)

        
def _network_training(model, train_loader, validation_loader, criterion, optimizer, training_params, tuning=False):
    try:
        best_accuracy = 0
        loss_history = []
        training_acc_history = []
        validation_acc_history = []
        
        for epoch in range(0, training_params['NUM_EPOCHS']):
            model.train()
            loss_scores = []
            training_acc_scores = []
            correct_predictions = 0
            
            for batch_index, (images, targets) in enumerate(train_loader):
                images = images.to(training_params['DEVICE'])
                targets = targets.to(training_params['DEVICE'])
                
                outputs = model(images)
                loss = criterion(outputs, targets)
                loss_scores.append(loss.item())
                
                _, preds = torch.max(outputs, 1)
                correct_predictions = (preds==targets).sum().item()
                training_acc_scores.append(correct_predictions/targets.shape[0])
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                if not tuning:
                    if (batch_index+1) % 100 == 0:
                        print(f"Epoch : [{epoch+1}/{training_params['NUM_EPOCHS']}] | Step : [{batch_index+1}/{len(train_loader)}] | Loss : {loss.item()} ")
            
            loss_history.append((sum(loss_scores)/len(loss_scores)))
            training_acc_history.append((sum(training_acc_scores)/len(training_acc_scores))*100)      
            print(f'Epoch : {epoch+1} | Loss : {loss_history[-1]} | Training Accuracy : {training_acc_history[-1]}%')
       
            model.eval()
            with torch.no_grad():
                correct_predictions = 0
                validation_acc_scores = []

                for images, targets in iter(validation_loader):
                    images = images.to(training_params['DEVICE'])
                    targets = targets.to(training_params['DEVICE'])
                                
                    outputs = model(images)

                    _, preds = torch.max(outputs, 1)
                    correct_predictions = (preds == targets).sum().item()
                    validation_acc_scores.append(correct_predictions/targets.shape[0])

                validation_acc_history.append((sum(validation_acc_scores)/len(validation_acc_scores))*100)
                print(f'Epoch {epoch+1} | Validation Accuracy {validation_acc_history[-1]}%')
                
                if not tuning: 
                    if validation_acc_history[-1]>best_accuracy:
                        best_accuracy = validation_acc_history[-1]
                        print('Saving the model...')
                        torch.save(model.state_dict(), f"Accuracy_{best_accuracy}_batchsize_{training_params['BATCH_SIZE']}_lr_{training_params['LEARNING_RATE']}.ckpt")
                
        return loss_history
    
    except Exception as e:
        print('Error occured in training method = ', e)

        
def _test_model(BEST_MODEL):
    try:
        model.load_state_dict(torch.load(BEST_MODEL))

        with torch.no_grad():
            correct_predictions = 0
            test_images = 0
            testing_acc_history = []

            for images, targets in iter(test_loader):
                images = images.to(training_params['DEVICE'])
                targets = targets.to(training_params['DEVICE'])

                outputs = network(images)
                _, preds = torch.max(outputs, 1)
                correct_predictions = (preds==targets).sum().item()
                test_images += targets.shape[0]
                testing_acc_history.append(correct_predictions/targets.shape[0])

            print(f'Accuracy of the network on the {test_images} test images: {(sum(testing_acc_history)/len(testing_acc_history))*100}')
        
    except Exception as e:
            print('Error occured in testing the model = ', e)
            

In [3]:

class CNN_Network(nn.Module):
    def __init__(self, model_params):
        try:
            super(CNN_Network, self).__init__()
            
            layers = []
            
            for input_channel, out_channel in zip([model_params['INPUT_SIZE']] + model_params['HIDDEN_LAYERS'][:-1], 
                                                     model_params['HIDDEN_LAYERS'][:len(model_params['HIDDEN_LAYERS'])]):
                layers.append(nn.Conv2d(input_channel, out_channel, model_params['KERNEL'], model_params['STRIDE'], model_params['PADDING'], bias=True))
                layers.append(nn.MaxPool2d(2, 2))
                layers.append(nn.ReLU())
            layers.append(nn.Flatten(1))      
            layers.append(nn.Linear(model_params['HIDDEN_LAYERS'][-1], model_params['OUTPUT_SIZE'], bias=True))

            self.layers = nn.Sequential(*layers)
        
        except Exception as e:
            print('initializing failed due to ', e)
    
    def forward(self, x):
        try:
            return self.layers(x)
        
        except Exception as e:
            print('forward pass failed due to ', e)
            

In [4]:
# !conda install -c conda-forge optuna

In [5]:
def _hyper_parameter_tuning(trial):
    
    DATA_PATH = 'D:/Repos/MLCS_Project_Assignments/'
    
    params = {
        'DEVICE' : torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        'TRAIN_BATCH_SIZE' :  trial.suggest_categorical('TRAIN_BATCH_SIZE', (32, 64)),
        'LEARNING_RATE' : trial.suggest_loguniform('LEARNING_RATE', 0.001, 0.01),
        'OPTIMIZER': trial.suggest_categorical('OPTIMIZER', (optim.Adam, optim.SGD)),
        'NUM_EPOCHS' : 1
    }
    
    model_params = {
        'INPUT_SIZE' : 1,
        'HIDDEN_LAYERS' : [160, 100, 64, 10],
        'OUTPUT_SIZE' : 10,
        'KERNEL' : 3,
        'STRIDE' : 1,
        'PADDING' : 1
    }
    
    train_loader, validation_loader, _ = _get_data(DATA_PATH, params['TRAIN_BATCH_SIZE'], 1000)
    model = CNN_Network(model_params).to(params['DEVICE'])
    criterion = nn.CrossEntropyLoss().to(params['DEVICE'])
    optimizer = params['OPTIMIZER'](model.parameters(), lr=params['LEARNING_RATE'])
    
    loss_history = _network_training(model, train_loader, validation_loader, criterion, optimizer, params, tuning=True)
    return np.mean(loss_history)

with warnings.catch_warnings(record=True):
    analysis_study = optuna.create_study(direction='minimize')
    analysis_study.optimize(_hyper_parameter_tuning, n_trials=2)



[32m[I 2021-11-04 04:09:32,389][0m A new study created in memory with name: no-name-c55aec76-fcbf-4853-8fbb-b443d3e8e556[0m


Epoch : 1 | Loss : 2.2018000485102336 | Training Accuracy : 19.066666666666666%


[32m[I 2021-11-04 04:09:50,201][0m Trial 0 finished with value: 2.2018000485102336 and parameters: {'TRAIN_BATCH_SIZE': 64, 'LEARNING_RATE': 0.00711612809661432, 'OPTIMIZER': <class 'torch.optim.sgd.SGD'>}. Best is trial 0 with value: 2.2018000485102336.[0m


Epoch 1 | Validation Accuracy 52.21875%
Epoch : 1 | Loss : 0.3483588643744588 | Training Accuracy : 88.60833333333333%


[32m[I 2021-11-04 04:10:04,690][0m Trial 1 finished with value: 0.3483588643744588 and parameters: {'TRAIN_BATCH_SIZE': 64, 'LEARNING_RATE': 0.0047752154928028335, 'OPTIMIZER': <class 'torch.optim.adam.Adam'>}. Best is trial 1 with value: 0.3483588643744588.[0m


Epoch 1 | Validation Accuracy 96.72916666666667%


NameError: name 'analysis_study' is not defined

In [None]:
print("SELECTED BEST SET OF HYPER-PARAMETERS:",analysis_study.best_params)
print("LOWEST LOSS SCORE ACHEIVED USING THE BEST HYPER-PARAMTERS", analysis_study.best_value)

In [None]:
df = analysis_study.trials_dataframe().drop(['state','datetime_start','datetime_complete','number'], axis=1)
df.index.name = 'trial'
df.sort_values(by='value').head()

In [None]:
plot_optimization_history(analysis_study)

In [None]:
plot_parallel_coordinate(analysis_study)

In [None]:
plot_slice(analysis_study)

In [None]:
plot_param_importances(analysis_study)

In [None]:

if __name__ == '__main__' :
    
    print(torch.__version__)
    
    DATA_PATH = 'D:\Repos\MLCS_Project_Assignments\\'
    
    training_params = {
        'DEVICE' : torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        'TRAIN_BATCH_SIZE' : 64,
        'TEST_BATCH_SIZE' : 1000,
        'LEARNING_RATE' : 0.001,
        'NUM_EPOCHS' : 1
    }
    
    model_params = {
        'INPUT_SIZE' : 1,
        'HIDDEN_LAYERS' : [160, 100, 64, 10],
        'OUTPUT_SIZE' : 10,
        'KERNEL' : 3,
        'STRIDE' : 1,
        'PADDING' : 1
    }
    
    train_loader, validation_loader, test_loader = _get_data(DATA_PATH, training_params['TRAIN_BATCH_SIZE'], training_params['TEST_BATCH_SIZE'])
    
    model = CNN_Network(model_params).to(training_params['DEVICE'])
    print(f'Network structure is: {model.parameters}')
    print(f'Total number of parameters: {sum(p.numel() for p in model.parameters())}')
    
    criterion = nn.CrossEntropyLoss().to(training_params['DEVICE'])
    optimizer = optim.Adam(model.parameters(), lr=training_params['LEARNING_RATE'])

    loss = _network_training(training_params)
    
    _test_model(BEST_MODEL='Accuracy_98.875_batchsize_64_lr_0.001.ckpt')
        

In [None]:
images, targets = next(iter(train_loader))

In [None]:
images.shape

In [None]:
len(targets.unique())