# VGG - 16 Deep Learning Network Implementation in Pytorch.

We are training the model on IEEE CovidChestXRay Dataset.

# Importing Libraries

In [None]:
%matplotlib inline

import os
import shutil
import random
import torch
import torchvision
import numpy as np

from PIL import Image
from matplotlib import pyplot as plt
from IPython.display import clear_output

torch.manual_seed(0)
print('Using PyTorch Version', torch.__version__)

# Preparing Training and Test Sets

In [None]:
class_names = ['Covid', 'Non-Covid']
root_dir = 'Dataset2'
source_dirs = ['Covid', 'Non-Covid']

if os.path.isdir(os.path.join(root_dir, source_dirs[1])):
    os.mkdir(os.path.join(root_dir, 'test'))

    for i, d in enumerate(source_dirs):
        os.rename(os.path.join(root_dir, d), os.path.join(root_dir, class_names[i]))

    for c in class_names:
        os.mkdir(os.path.join(root_dir, 'test', c))

    for c in class_names:
        images = [x for x in os.listdir(os.path.join(root_dir, c)) if (x[-3].lower().endswith('png') or x[-3:].lower().endswith('jpg') or x[-4:].lower().endswith('jpeg'))]
        selected_images = random.sample(images, 30)
        for image in selected_images:
            source_path = os.path.join(root_dir, c, image)
            target_path = os.path.join(root_dir, 'test', c, image)
            shutil.move(source_path, target_path)

# Creating Custom Dataset

In [None]:
class ChestXRayDataset(torch.utils.data.Dataset):
    def __init__(self, image_dirs, transform):
        def get_images(class_name):
            images = [x for x in os.listdir(image_dirs[class_name]) if (x[-3:].lower().endswith('png') or x[-3:].lower().endswith('jpg') or x[-4:].lower().endswith('jpeg')) ] 
            print(f'Found {len(images)} {class_name} examples')
            return images
        
        self.images = {}
        self.class_names = ['Covid', 'Non-Covid']
        
        for c in self.class_names:
            self.images[c] = get_images(c)
            
        self.image_dirs = image_dirs
        self.transform = transform
        
    def __len__(self):
        return sum([len(self.images[c]) for c in self.class_names])
    
    def __getitem__(self, index):
        class_name = random.choice(self.class_names)
        index = index % len(self.images[class_name])
        image_name = self.images[class_name][index]
        image_path = os.path.join(self.image_dirs[class_name], image_name)
        image = Image.open(image_path).convert('RGB')
        return self.transform(image), self.class_names.index(class_name)

# Image Transformations

In [None]:
train_transform = torchvision.transforms.Compose([
  torchvision.transforms.Resize(size=(224, 224)),
  torchvision.transforms.RandomHorizontalFlip(),
  torchvision.transforms.ToTensor(),
  torchvision.transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.225, 0.224])  
])

test_transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(size=(224, 224)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean = [0.485, 0.456, 0.406],  std = [0.229, 0.225, 0.224]),
])

# Prepare DataLoader

In [None]:
train_dirs = {
    'Covid' : 'Dataset2/Covid',
    'Non-Covid' : 'Dataset2/Non-Covid'
}

train_dataset = ChestXRayDataset(train_dirs, train_transform)

In [None]:
test_dirs = {
    'Covid' : 'Dataset2/test/Covid',
    'Non-Covid' : 'Dataset2/test/Non-Covid'
}

test_dataset = ChestXRayDataset(test_dirs, test_transform)

In [None]:
batch_size = 6

dl_train = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle=True)
dl_test = torch.utils.data.DataLoader(test_dataset, batch_size = batch_size, shuffle=True)

print('Number of Training Batches', len(dl_train))
print('Number of Test Batches', len(dl_test))

# Data Visualization

In [None]:
class_names = train_dataset.class_names

def show_images(images, labels, preds):
    plt.figure(figsize=(10, 4))
    for i, image in enumerate(images):
        plt.subplot(1, 6, i + 1, xticks=[], yticks=[])
        image = image.numpy().transpose((1, 2, 0))
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        image = image * std + mean
        image = np.clip(image, 0., 1.)
        plt.imshow(image)
        col = 'green' 
        if preds[i] != labels[i]:
            col = 'red'
        plt.xlabel(f'{class_names[int(labels[i].numpy())]}')
        plt.ylabel(f'{class_names[int(preds[i].numpy())]}', color=col)
    plt.tight_layout()
    plt.show()

In [None]:
images, labels = next(iter(dl_train))
show_images(images, labels, labels)

In [None]:
images, labels = next(iter(dl_test))
show_images(images, labels, labels)

# Creating the Model

In [None]:
vgg16 = torchvision.models.vgg16(pretrained=True)
print(vgg16)

In [None]:
vgg16.classifier[6].out_features = 2

for params in vgg16.features.parameters():
    params.requires_grad = False

In [None]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(vgg16.classifier.parameters(), lr = 0.001, momentum=0.9)

In [None]:
def show_preds():
    vgg16.eval()
    images, labels = next(iter(dl_train))
    outputs = vgg16(images)
    _, preds = torch.max(outputs.data, 1)
    show_images(images, labels, preds)

In [None]:
show_preds

# Define Train function

In [None]:
def train(epochs):
    print("Starting Training ...")
    accuracy_yaxis = []
    val_loss_yaxis = []
    train_loss_yaxis = []
    val_accuracy_yaxis = []
    steps_xaxis = []
    for e in range(0, epochs):
        print('_'*20)
        print(f'Starting epoch {e + 1} / {epochs}')
        print('_'*20)

        train_loss, val_loss = 0, 0
        vgg16.train()   # We set the vgg16 model to train mode

        for train_step, data in enumerate(dl_train):
            images, labels = data[0], data[1]
            optimizer.zero_grad()
            outputs = vgg16(images)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            if train_step % 20 == 0:
                print('Evaluating at step', train_step)
                accuracy = 0
                vgg16.eval()

                for val_step, (images, labels) in enumerate(dl_test):
                    outputs = vgg16(images)
                    loss = loss_fn(outputs, labels)
                    val_loss += loss.item()
                    _, preds = torch.max(outputs.data, 1)
                    accuracy += sum((preds == labels).numpy())

                val_accuracy_yaxis.append(accuracy)
                train_loss_yaxis.append(train_loss)
                val_loss /= (val_step + 1)
                accuracy = accuracy / len(test_dataset)
                print(f'Validation Loss : {val_loss:.4f} Accuracy : {accuracy:.4f}')
                accuracy_yaxis.append(accuracy)
                val_loss_yaxis.append(val_loss)
                steps_xaxis.append(((len(dl_train) - 1) * e) + train_step)
                #show_preds()
                vgg16.train()

                if accuracy >= 0.99:
                    print('Performance Condition Satisfied. Stopping ... ')
                    return (accuracy_yaxis, val_loss_yaxis, train_loss_yaxis, val_accuracy_yaxis, steps_xaxis)

        train_loss /= (train_step + 1)
        print(f'Training Loss : {train_loss:.4f}')
    print('Training Complete ...')
    return (accuracy_yaxis, val_loss_yaxis, train_loss_yaxis, val_accuracy_yaxis, steps_xaxis)

In [None]:
%%time
(accuracy_yaxis, val_loss_yaxis, train_loss_yaxis, val_accuracy_yaxis, steps_xaxis)= train(epochs = 2)

# Final Results

In [None]:
show_preds()

# Plotting the Results

In [None]:
plt.style.use("ggplot")
plt.figure()
plt.plot(steps_xaxis, val_loss_yaxis, label = 'validation loss')
plt.plot(steps_xaxis, accuracy_yaxis, label = 'train accuracy')
#plt.plot(steps_xaxis, train_loss_yaxis, label = 'train loss')
#plt.plot(steps_xaxis, val_accuracy_yaxis, label = 'validation accuracy')
plt.legend()
plt.title('(Validation Loss , Accuracy) VS Train Step')