<h1><center>CONVOLUTIONAL NEURAL NETWORKS | ML FINAL PROJECT</center></h1>
Contains the model architecture, training, testing, and visualization code for the two neural networks I coded from scratch for this 

In [None]:
###########################
######   LIBRARIES   ######
###########################

# ------ STANDARD ------ #
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import statistics
%matplotlib inline

# ------ TORCH MODULES ------ #
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as tt
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader
from torchvision.datasets import ImageFolder
from torch.utils.data import random_split
from torch.optim import lr_scheduler
from torch_lr_finder import LRFinder
import torch.optim as optim
from torchsummary import summary


# ------ SKLEARN MODULES ------ #
import scipy.io
from sklearn.utils import shuffle
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report, confusion_matrix

###### IMAGE DISPLAY ######
from IPython.display import Image
from IPython.core.display import HTML 


In [None]:
# ----- PATH -----
training_directory = 'C:\\Users\\17725\\dataScienceNCF\\Spring2020_MLProject\\data\\train'
validation_directory = 'C:\\Users\\17725\\dataScienceNCF\\Spring2020_MLProject\\data\\validation'
test_directory = 'C:\\Users\\17725\\dataScienceNCF\\Spring2020_MLProject\\data\\test'


# ----- TRANSFORMS / STANDARDIZATION -----

imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std = [0.229, 0.224, 0.225]
    
standard_transform = tt.Compose([
                        tt.Resize((150, 150)),
                        tt.RandomHorizontalFlip(p = 0.5),
                        tt.ToTensor(), 
                        tt.Normalize(mean = imagenet_mean, 
                                  std = imagenet_std)
                    ])

# ----- BUILD IMAGE FOLDERS -----

training_data = ImageFolder(training_directory, 
                            transform = standard_transform)

validation_data = ImageFolder(validation_directory, 
                              transform = standard_transform)

test_data = ImageFolder(test_directory, 
                              transform = standard_transform)

# ----- DATA LOADERS -----

# Batch size 
batch_size = 10

train_loader = DataLoader(training_data, 
                          batch_size, 
                          shuffle=True, 
                          num_workers=4, 
                          pin_memory=True)

val_loader = DataLoader(validation_data, 
                        batch_size*2, 
                        num_workers=4, 
                        pin_memory=True)

test_loader = DataLoader(test_data, 
                        batch_size*2, 
                        num_workers=4, 
                        pin_memory=True)


<center><b>HELPER FUNCTIONS</b></center>
Functions provided in the Jovian lectures that expedite the training process. The hyyperparameters and data are passed into the fit function, which proceeds to fit the model.

In [None]:

@torch.no_grad()

#   ----- MOVE TO DEVICE -----  #
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

#   ----- DATA LOADER WRAPPER  -----  #
class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

#   ----- EVALULATE -----  #
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

#  ----- FIT THE MODEL  -----  #

def fit(epochs, model, train_loader, val_loader, optimizer, name):
    history = []
    cur_accurary = 0
    best_accuracy = 0
    
    for epoch in range(epochs):
        
        # -- Training -- #
        model.train()
        train_losses = []
        train_accuracies = []
        for batch in train_loader:
            model_out = model.training_step(batch)
            loss = model_out['loss']
            train_losses.append(loss)
            train_accuracies.append(model_out['acc'])
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
        # -- Validation -- #
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['train_acc'] = torch.stack(train_accuracies).mean().item()
        model.epoch_end(epoch, result)
        cur_accuracy = result['val_acc']
        if cur_accuracy > best_accuracy:
                torch.save(model.state_dict(), '{0}'.format(name))
                best_accuracy = cur_accuracy
        history.append(result)
        
    return history

<h2><center>IMAGE CLASSIFICATION BASE</center></h2>

In [None]:

class ImageClassificationBase(nn.Module):
    
    '''
    Abstract class that extends nn.Module and adds some functions that make training a lot nicer. 
    The functions provide the steps taken when training and validating the model, as well as code 
    to print the loss and accuracy at each epoch. 
    '''
    
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  
        loss = F.cross_entropy(out, labels) 
        acc = accuracy(out, labels)
        return {'loss': loss, 'acc': acc}
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    
        loss = F.cross_entropy(out, labels)   
        acc = accuracy(out, labels)           
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, train_acc: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['train_loss'], result['train_acc'], result['val_loss'], result['val_acc']))
        
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))


<h2><center>CLASS OBJECT FOR TINYNET</h2></center>

In [None]:
class TinyNet(ImageClassificationBase):
    
    '''
    Convolutional neural network created to classify cat breeds. Named TinyNet because it
    is not very large.
    '''
    
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
           
            # ----- LAYER 1 ----- #
            # input: 3 x 150 x 150
            nn.Conv2d(3, 30, kernel_size = 5, padding = 5//2),
            nn.BatchNorm2d(30),
            nn.ReLU(),
            nn.Dropout(0.8),
            nn.MaxPool2d((3,3), stride =  2),
            
            # ----- LAYER 2 ----- #
            nn.Conv2d(30, 30, kernel_size = 5, padding = 5//2),
            nn.BatchNorm2d(30),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.MaxPool2d((3,3), stride =  2),
    
            # ----- CLASSIFIER----- #
            nn.Flatten(),
            nn.Linear(38880, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Linear(1024, 15))
        
    def forward(self, xb):
        return self.network(xb)

<h2><center>CLASS OBJECT FOR BABYNET</h2></center>

In [None]:
 
class BabyNet(ImageClassificationBase):
    '''
    Convolutional neural network created to classify cat breeds. Named BabyNet due to it's 
    accuracy as being no better than how I expect a baby would perform whehn asked to classify
    cat breeds. 
    '''
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
           
            # ----- BLOCK 1 ----- #
            # input: 3 x 150 x 150
            nn.Conv2d(3, 16, kernel_size = 5, padding = 5//2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(0.8),
            nn.MaxPool2d((3,3), stride =  2),
            # output: 50 x 74 x 74
            
            # ----- BLOCK 2 ----- #
            nn.Conv2d(16, 36, kernel_size = 5, padding = 5//2),
            nn.BatchNorm2d(36),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.MaxPool2d((3,3), stride =  2),
    
            # ----- BLOCK 3 ----- #
            nn.Conv2d(36, 72, kernel_size = 5, padding = 5//2),
            nn.BatchNorm2d(72),
            nn.Conv2d(72, 100, kernel_size = 5, padding = 5//2),
            nn.BatchNorm2d(100),
            nn.Conv2d(100, 75, kernel_size = 5, padding = 5//2),
            nn.BatchNorm2d(75),
            nn.Dropout(0.2),
            nn.MaxPool2d((3,3), stride =  2),
            nn.AvgPool2d((3,3), stride =  2),
    
            # ----- CLASSIFIER----- #
            nn.Flatten(),
            nn.Linear(4800, 100),
            nn.BatchNorm1d(100),
            nn.ReLU(),
            nn.Linear(100, 15))
        
    def forward(self, xb):
        return self.network(xb)

<h2><center>BABYNET MODEL</h2></center>
01 / Convolutional neural network with five convolutional layers and two linear layers. 
  
02 / No softmax function because softmax is implemented within cross-entropy loss and the Pytorch documentation specifically states not to use softmax as the final activation when also using cross-entropy loss.     
  
03 / Performs badly, but better than the null model (which would be about 7% accuracy)

04 / References the AlexNet architecture (https://papers.nips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf) for the blocking design. I decided to use AlexNet as a base because it is a relatively shallow design (small dataset --> deeper architecture would be more prone to over-fitting and is not appropriate) and is a classic that I wanted to understand better. 

In [None]:
model = BabyNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(BabyNet().parameters(), lr = 1e-7, weight_decay = 0.001)

lr_finder = LRFinder(model, optimizer, criterion, device = "cuda")
lr_finder.range_test(train_loader, end_lr = 1, num_iter = 100)
lr_finder.plot()

In [None]:
# ------ HYPERPARAMETERS ------ #
# Adjustments to the hyperparameters made VERY little difference in overall model performance

# Optimizers tried: 
#      Adam and SGD ✔️
# Learning Rate:
#      Determined by the LR finder; tried several iterations, little difference in results
# Weight Decay Tried:
#      0.0001, 0.0003, 0.001 ✔️, 0.003, 0.01
# Kernel Sizes Tried:
#      3x3, 5x5✔️, 7x7
# Epochs trained for: 
#      Up to 150; no additional benefit to validation accuracy gained from letting the model
#      train beyond ~50 epochs

device = torch.device('cuda')
model = to_device(BabyNet(), device)
epochs = 50
optimizer = optim.SGD(model.parameters(), lr = 0.002, weight_decay = 0.001)

# *** fit the model here ***
train_dl = DeviceDataLoader(train_loader, device)
val_dl = DeviceDataLoader(val_loader, device)

history = fit(epochs, model, train_dl, val_dl, optimizer, 'bestBabyNet.pt')


<b>TEST BABYNET</b>

In [None]:
# ------ Load in the Best BabyNet ------ #

best_BabyNet = BabyNet()
best_BabyNet.load_state_dict(torch.load('bestBabyNet.pt'))

In [None]:

# ------ Predict ------ #

def predict(model, images):
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        return predicted

acc_list = []
for batch in test_loader:
    images, labels = batch
    preds = predict(best_BabyNet, images)
    test_accuracy = torch.tensor(torch.sum(preds == labels).item() / len(preds)).item()
    acc_list.append(test_accuracy)
    
statistics.mean(acc_list)


<b><center>Accuracy/Loss Visualization</center></b>

In [None]:
val_loss_values = []
val_acc_values = []
train_loss_values = []
train_acc_values = []

for i in range(50):
    val_loss_values.append((history[i]['val_loss']))
    val_acc_values.append((history[i]['val_acc']))
    train_loss_values.append((history[i]['train_loss']))
    train_acc_values.append((history[i]['train_acc']))

epochs = np.linspace(0,50, num = 50)

plt.subplot(1, 2, 1) # row 1, col 2 index 1
plt.plot(epochs, val_acc_values)
plt.plot(epochs, train_acc_values)
plt.title("Accuracy")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')

plt.subplot(1, 2, 2) # index 2
plt.plot(epochs, val_loss_values)
plt.plot(epochs, train_loss_values)
plt.title("Loss")
plt.xlabel('Epochs')
plt.ylabel('Loss')

plt.show()

<h2><center>TINYNET MODEL</h2></center>
01 / Convolutional neural network with two convolutional layers, two max-pools, and two linear layers. Combines batch normalization and drop-out in an attempt to avoid over-fitting. According to the relevant literature, regularization is not typically necessary for shallow neural networks. However, the model did no better than chance without regularization and as such I decided to try it (because it couldn't hurt).  
  
02 / No softmax function because softmax is implemented within cross-entropy loss and the Pytorch documentation specifically states not to use softmax as the final activation when also using cross-entropy loss.     
  
03 / Performs badly, but better than the null model 

In [None]:
model = TinyNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(TinyNet().parameters(), lr = 1e-7, weight_decay = 0.001)

lr_finder = LRFinder(model, optimizer, criterion, device = "cuda")
lr_finder.range_test(train_loader, end_lr = 1, num_iter = 100)
lr_finder.plot()

In [None]:
##### HYPERPARAMETERS #####
device = torch.device('cuda')
model = to_device(TinyNet(), device)
epochs = 50
optimizer = optim.SGD(model.parameters(), lr = 0.003)

# *** fit the model here ***
train_dl = DeviceDataLoader(train_loader, device)
val_dl = DeviceDataLoader(val_loader, device)

history = fit(epochs, model, train_dl, val_dl, optimizer, 'bestTinyNet.pt')


In [None]:
val_loss_values = []
val_acc_values = []
train_loss_values = []
train_acc_values = []

for i in range(50):
    val_loss_values.append((history[i]['val_loss']))
    val_acc_values.append((history[i]['val_acc']))
    train_loss_values.append((history[i]['train_loss']))
    train_acc_values.append((history[i]['train_acc']))
    
epochs = np.linspace(0,50)
plt.subplot(1, 2, 1) # row 1, col 2 index 1
plt.plot(epochs, val_acc_values)
plt.plot(epochs, train_acc_values)
plt.title("Accuracy")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')

plt.subplot(1, 2, 2) # index 2
plt.plot(epochs, val_loss_values)
plt.plot(epochs, train_loss_values)
plt.title("Loss")
plt.xlabel('Epochs')
plt.ylabel('Loss')

plt.show()

In [None]:
# ------ Load in the Best TinyNet ------ #

best_TinyNet = TinyNet()
best_TinyNet.load_state_dict(torch.load('bestTinyNet.pt'))

In [None]:

# ------ Predict ------ #

def predict(model, images):
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        return predicted

acc_list = []
for batch in test_loader:
    images, labels = batch
    preds = predict(best_TinyNet, images)
    test_accuracy = torch.tensor(torch.sum(preds == labels).item() / len(preds)).item()
    acc_list.append(test_accuracy)
    
statistics.mean(acc_list)


<b><center>Accuracy/Loss Visualization</center></b>

In [None]:
val_loss_values = []
val_acc_values = []
train_loss_values = []
train_acc_values = []

for i in range(50):
    val_loss_values.append((history[i]['val_loss']))
    val_acc_values.append((history[i]['val_acc']))
    train_loss_values.append((history[i]['train_loss']))
    train_acc_values.append((history[i]['train_acc']))

epochs = np.linspace(0,50, num = 50)

plt.subplot(1, 2, 1) # row 1, col 2 index 1
plt.plot(epochs, val_acc_values)
plt.plot(epochs, train_acc_values)
plt.title("Accuracy")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')

plt.subplot(1, 2, 2) # index 2
plt.plot(epochs, val_loss_values)
plt.plot(epochs, train_loss_values)
plt.title("Loss")
plt.xlabel('Epochs')
plt.ylabel('Loss')

plt.show()