In [1]:
# configuration cell

no_epochs = 30

act_fn_dict = {
    'conv1':'relu',
    'conv2':'relu',
    'conv3':'relu',
    'conv4':'relu',
    'conv5':'relu',
    'fc1':'relu'
}

kernel_size_list = [3,3,3,3,3]
no_kernel_list = [32,32,64,64,128]
dropout_list = [0,0,0.5]
fc1_nodes = 1024
no_classes = 10
lr = 0.0001
lr_schedule = 0.5 # per 10 epochs half the learning rate

In [2]:
import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import matplotlib.pyplot as plt
import numpy as np
import shutil
import os
import wandb
from util import*

In [3]:
# check if CUDA is available
use_cuda = torch.cuda.is_available()
if use_cuda == True:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [4]:
modelName = 'Best_CNN_5Layers_iNaturalist'

In [5]:
try:
    os.makedirs(modelName+"/checkpoint")
    os.makedirs(modelName+"/best_model")
except:
    print("directory already present")

directory already present


In [6]:
ckp_path = "./"+modelName+"/checkpoint/current_checkpoint.pt"
best_ckp_path = "./"+modelName+"/best_model/best_model.pt"

In [7]:
wandb.init(project=modelName)

wandb: Currently logged in as: rayanz (use `wandb login --relogin` to force relogin)
wandb: wandb version 0.10.26 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


In [8]:
batch_size = 64
datasetTrain, datasetVal, datasetTest = load_datasets()
loaders = data_loader(datasetTrain, datasetVal, datasetTest, batch_size)

In [9]:
def act_fn(act_name):
    if act_name=="relu":
        return nn.ReLU(inplace=True)
    
    elif act_name=="sigmoid":
        return nn.Sigmoid(inplace=True)
    
    elif act_name=="tanh":
        return nn.Tanh(inplace==True)
    

In [10]:
class conv_block(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size = 3 ,BN=True , NL="relu", stride = 1, padding = 0):
        super(conv_block, self).__init__()
        self.BN=BN
        self.NL=NL
        k = kernel_size
        
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size = k, stride = stride, padding = padding, bias=False)
        
        if self.BN==True:
            self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
            
        self.act = act_fn(NL)
        
    def forward(self, x):
        x = self.conv(x)
        
        if self.BN==True:
            x = self.bn(x)
        
        return self.act(x)

In [11]:
class fc_block(nn.Module):
    def __init__(self, in_channels, out_channels, BN=False , NL="relu"):
        super(fc_block, self).__init__()
        self.BN=BN
        self.NL=NL
        self.fc = nn.Linear(in_channels, out_channels)
        
        if self.BN==True:
            self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
            
        self.act = act_fn(NL)
        
    def forward(self, x):
        x = self.fc(x)
        
        if self.BN==True:
            x = self.bn(x)
        
        x = self.act(x)
        
        return x

In [12]:
def get_fc_in(input_dim, kernel_size_list, no_kernel_list):
    H = input_dim
    fc_in = H - kernel_size_list[0] + 1 # conv1
    fc_in = (fc_in - 2) //2  + 1 # max pool 1
    fc_in = fc_in - kernel_size_list[1] + 1 # conv2
    fc_in = (fc_in - 2) //2  + 1 # max pool 2
    fc_in = fc_in - kernel_size_list[2] + 1 #conv3
    fc_in = (fc_in - 2) //2  + 1 # max pool 3
    fc_in = fc_in - kernel_size_list[3] + 1 #conv4
    fc_in = (fc_in - 2) //2  + 1 # max pool 4
    fc_in = fc_in - kernel_size_list[4] + 1 #conv5
    fc_in = (fc_in - 2) //2  + 1 # max pool 5
    #print(fc_in)
    return fc_in * fc_in * no_kernel_list[4]

In [13]:
class CNN_5layer(nn.Module):
    def __init__(self, kernel_size_list, no_kernel_list, act_fn_dict, dropout_list, fc1_nodes, no_classes):
        super(CNN_5layer, self).__init__()
        self.dropout_list = dropout_list
        self.input_dim = 224
        #self.input_dim = 128
        self.conv1 = conv_block(3, no_kernel_list[0], kernel_size=kernel_size_list[0], BN=False, NL=act_fn_dict['conv1'])
        self.maxpool1 = nn.MaxPool2d((2, 2))
        self.conv2 = conv_block(no_kernel_list[0], no_kernel_list[1], kernel_size=kernel_size_list[1], BN=True, NL=act_fn_dict['conv2'])
        self.maxpool2 = nn.MaxPool2d((2, 2))
        
        if self.dropout_list[0]!=0:
            self.dropout1 = nn.Dropout(dropout_list[0])

        self.conv3 = conv_block(no_kernel_list[1], no_kernel_list[2], kernel_size=kernel_size_list[2], BN=True, NL=act_fn_dict['conv3'])
        self.maxpool3 = nn.MaxPool2d((2, 2))
        self.conv4 = conv_block(no_kernel_list[2], no_kernel_list[3], kernel_size=kernel_size_list[3], BN=True, NL=act_fn_dict['conv4'])
        self.maxpool4 = nn.MaxPool2d((2, 2))
        
        if self.dropout_list[1]!=0:
            self.dropout2 = nn.Dropout(dropout_list[1])

        self.conv5 = conv_block(no_kernel_list[3], no_kernel_list[4], kernel_size=kernel_size_list[4], BN=True, NL=act_fn_dict['conv5'])
        self.maxpool5 = nn.MaxPool2d((2, 2))
        
        self.fc1_in_features = get_fc_in(self.input_dim, kernel_size_list, no_kernel_list)
        
        self.fc1 = fc_block(self.fc1_in_features, fc1_nodes , NL=act_fn_dict['fc1'])
        
        if self.dropout_list[2]!=0:
            self.dropout3 = nn.Dropout(dropout_list[2])
        
        self.fc2 = nn.Linear(fc1_nodes, no_classes)
    
    
    def forward(self, x):
        if x.shape[2]!=self.input_dim:
            print("input dim not matched")
            return
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.maxpool2(x)
        
        if self.dropout_list[0]!=0:
            x = self.dropout1(x)
        
        x = self.conv3(x)
        x = self.maxpool3(x)
        x = self.conv4(x)
        x = self.maxpool4(x)
        
        if self.dropout_list[1]!=0:
            x = self.dropout2(x)
        
        x = self.conv5(x)
        x = self.maxpool5(x)
        
        x = x.view(x.shape[0], -1)
        
        x = self.fc1(x)
        if self.dropout_list[2]!=0:
            x = self.dropout3(x)
        
        x = self.fc2(x)
        
        return x

In [14]:
model = CNN_5layer(kernel_size_list, no_kernel_list, act_fn_dict, dropout_list, fc1_nodes, no_classes)

In [15]:
model = model.to(device)

In [16]:
optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
scheduler = StepLR(optimizer, step_size=10, gamma=lr_schedule)
criterion = nn.CrossEntropyLoss()

In [17]:
def train(start_epochs, n_epochs, valid_loss_min_input, loaders, model, optimizer, criterion,scheduler, use_cuda, checkpoint_path, best_model_path):
    
    valid_loss_min = valid_loss_min_input 
    
    for epoch in range(start_epochs, start_epochs+n_epochs):
        
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        tnum_correct = 0
        tnum_examples = 0
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
                
            optimizer.zero_grad()
            
            output = model(data)
            loss = criterion(output, target)
            
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1],target).view(-1)
            tnum_correct += torch.sum(correct).item()
            tnum_examples += correct.shape[0]
            loss.backward()
            optimizer.step()
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
        train_acc = tnum_correct / tnum_examples
        
        ######################    
        # validate the model #
        ######################
        model.eval()
        num_correct = 0
        num_examples = 0
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            
            output = model(data)
            loss = criterion(output, target)
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1],target).view(-1)
            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]
            
        train_loss = train_loss/len(loaders['train'].dataset)
        valid_loss = valid_loss/len(loaders['valid'].dataset)
        valid_acc = num_correct / num_examples
        scheduler.step()
        
        print('Epoch: {}\tTraining Loss: {:.6f}\tTrain Accuracy: {:.2f}\tValidation Loss: {:.6f}\tvalidation Accuracy: {:.2f}'.format(
            epoch, 
            train_loss,
            train_acc,
            valid_loss,
            valid_acc
            ))
        
        wandb.log({'epoch': epoch,'train loss': train_loss,'train accuracy': train_acc,
                   'val loss': valid_loss, 'val accuracy': valid_acc})
        
        checkpoint = {
            'epoch': epoch + 1,
            'valid_loss': valid_loss,
            'valid_acc': valid_acc,
            'valid_loss_min': valid_loss_min,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        
        save_ckp(checkpoint, False, checkpoint_path, best_model_path)
        
        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,valid_loss))
            # save checkpoint as best model
            save_ckp(checkpoint, True, checkpoint_path, best_model_path)
            valid_loss_min = valid_loss
            
    # return trained model
    return model

In [18]:
trained_model = train(start_epochs = 1,
                      n_epochs = no_epochs,
                      valid_loss_min_input = np.Inf,
                      loaders = loaders,
                      model = model,
                      optimizer = optimizer,
                      criterion = criterion,
                      scheduler = scheduler,
                      use_cuda = use_cuda,
                      checkpoint_path = ckp_path,
                      best_model_path = best_ckp_path
                     )

Epoch: 1	Training Loss: 0.000242	Train Accuracy: 0.21	Validation Loss: 0.002002	validation Accuracy: 0.30
Validation loss decreased (inf --> 0.002002).  Saving model ...
Epoch: 2	Training Loss: 0.000231	Train Accuracy: 0.25	Validation Loss: 0.001939	validation Accuracy: 0.32
Validation loss decreased (0.002002 --> 0.001939).  Saving model ...
Epoch: 3	Training Loss: 0.000226	Train Accuracy: 0.27	Validation Loss: 0.001886	validation Accuracy: 0.33
Validation loss decreased (0.001939 --> 0.001886).  Saving model ...
Epoch: 4	Training Loss: 0.000223	Train Accuracy: 0.28	Validation Loss: 0.001879	validation Accuracy: 0.36
Validation loss decreased (0.001886 --> 0.001879).  Saving model ...
Epoch: 5	Training Loss: 0.000219	Train Accuracy: 0.30	Validation Loss: 0.001857	validation Accuracy: 0.35
Validation loss decreased (0.001879 --> 0.001857).  Saving model ...
Epoch: 6	Training Loss: 0.000217	Train Accuracy: 0.30	Validation Loss: 0.001848	validation Accuracy: 0.34
Validation loss decrease

## Training accuracy for best

In [20]:
# load the saved last checkpoint
best_trained_model, optimizer, start_epoch, valid_loss, valid_acc, valid_loss_min = load_ckp(best_ckp_path, model, optimizer)

In [21]:
best_trained_model.eval()
test_acc = 0.0
test_num_correct = 0
test_num_examples = 0
for data, target in loaders['test']:
    with torch.no_grad():
        data, target = data.cuda(), target.cuda()
        output = best_trained_model(data)
        # calculate accuracy
        correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1],target).view(-1)
        test_num_correct += torch.sum(correct).item()
        test_num_examples += correct.shape[0]
        
test_acc = test_num_correct / test_num_examples
print('Test Accuracy of the model is : {}%'.format(round(test_acc*100.0), 2))

Test Accuracy of the model is : 42%


In [22]:
wandb.log({"Test Accuracy": test_acc})