In [None]:
#Code Reference: https://github.com/myconcordia/COMP478/blob/main/SampeProject.ipynb
#Provided by COMP478 course
#Title: COMP478/SampeProject
#Author: myconcordia
#Date: May 4, 2021
#Code Version: NA
#Availibilty: https://github.com/myconcordia/COMP478/blob/main/SampeProject.ipynb



#importing tools and dependencies
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F

from torchvision.datasets import ImageFolder
from torchvision.models import vgg16_bn
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.utils.data import Subset
from torch.optim import Adam

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score 


from collections import Counter



In [None]:
#prepping the images in the dataset to be used in the training model
transformData = transforms.Compose([transforms.Resize((100,100)),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                     ])

DATA_DIR = "C:/Users/ineke/OneDrive/Desktop/VSProjects/COMP478/Dataset"
dataset = ImageFolder(DATA_DIR, transform = transformData)
CLASSES = dataset.class_to_idx.keys()
print('Total images in dataset',len(dataset)) 

In [None]:
#dividing dataset into trained and tested and load the tested images 
all_dataset= np.arange(len(dataset))
trained, tested = train_test_split(all_dataset, test_size=0.25, random_state=1, stratify=dataset.targets)

train_label = np.array(dataset.targets)[trained]
test_label = np.array(dataset.targets)[tested]

loader = DataLoader(Subset(dataset, tested), batch_size=32, shuffle=True, drop_last=True)



In [None]:
#plot the distribution to see the trained and tested sets
plt.figure(figsize=(14,7))
plt.subplot(1,2,1)
plt.title('Distribution of Classes in Training')
plt.xlabel('Classes')
plt.ylabel('Number of images in class')
plt.xticks(np.arange(66),CLASSES, rotation = 60)
train_labels_dist = Counter(train_label)
plt.bar(train_labels_dist.keys(), train_labels_dist.values())

plt.subplot(1,2,2)
plt.title('Distribution of Classes in Testing')
plt.xlabel('Classes')
plt.ylabel('Number of images in class')
plt.xticks(np.arange(66),CLASSES, rotation = 60)
test_labels_dist = Counter(test_label)
plt.bar(test_labels_dist.keys(), test_labels_dist.values())

In [None]:
#setting the pre-trained VGG16 model

#gpu 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

class VGG16Model (nn.Module):
        def __init__(self):
            super().__init__()
            self.nn = vgg16_bn(pretrained=True)
            features = self.nn.classifier[6].in_features
            
            final_layer = list(self.nn.classifier.children())[:-1]
            final_layer.extend([nn.Linear(features, 66)])
            
            for param in self.nn.parameters():
                param.requires_grad = False
            
            self.nn.classifier = nn.Sequential(*final_layer)
    
        def forward(self, temp):
            return self.nn(temp)


In [None]:
criteria = nn.CrossEntropyLoss()
KF = StratifiedKFold(5, shuffle=True)
model = VGG16Model()
model.to(device)
optimizer = Adam(model.parameters(), lr=0.001)


In [None]:
@torch.no_grad()
def evaluateModel (loader):
    model.to('cpu')
    model.eval()
    prediction = torch.tensor([])
    target = torch.tensor([])

    for _, (image, label) in enumerate(tqdm(loader)):
        out = model(image)
        _, predicted = torch.max(out.data, 1)
        target = torch.cat((target, label))
        prediction = torch.cat((prediction, predicted))

    accuracyRatio = accuracy_score(target, prediction) * 100
    recallRatio = recall_score(target, prediction, average='macro')
    precisionRatio = precision_score(target, prediction, average='macro')
    print("Accuracy: " + accuracyRatio)
    print ("Recall: " + recallRatio)
    print("Precision: " + precisionRatio)

    return [accuracyRatio, recallRatio, precisionRatio]
    

In [None]:
def trainModel (trainLoader, step):
    for epoch in range(5):
        lossValue = 0.0
        accurateValue = 0.0
        for i, (images, labels) in enumerate(trainLoader):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criteria(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total = labels.size(0)
            _, predicted = torch.max(outputs.data, 1)
            correct = (predicted == labels).sum().item()
            
            lossValue = loss.item()
            accurateValue = (correct / total) * 100
            
            print(f'Epoch [{epoch + 1}/{5}], Step [{i + 1}/{step}], Loss: {loss.item():.4f}, Accuracy: {(correct / total) * 100:.2f}')

    torch.save(model.state_dict(), f'./{model.__class__.__name__}-model.pt')
    return 

In [None]:
def runtTest ():
    for k_fold_num, (train_index, test_index) in enumerate(KF.split(trained,np.array(dataset.targets)[trained])):
        trainFold = Subset(dataset, trained[train_index])
        testFold = Subset(dataset, trained[test_index])

        trainLoader = DataLoader(trainFold, batch_size=32, shuffle=True, drop_last=True)
        testLoader = DataLoader(testFold, batch_size=32, shuffle=True, drop_last=True)
        step = len(trainLoader)

        trainModel (trainLoader, step)
        evaluateModel (testLoader)
    return


runtTest(); 
