


### **COVID-19 Classification using pretrained VGG16 architecture**

**1. Mount google drive to download the images**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

**2. Importing required modules and libraries**

In [None]:
from __future__ import print_function, division

import os
import zipfile
import random
import numpy as np
import time
import copy

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable

import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt

**3. Using CUDA**

In [None]:
use_gpu = torch.cuda.is_available()

**4. Download and Unzip the data file**

In [None]:
def data_download_unzip():
  local_zip = '/content/drive/MyDrive/DS-Data/Data.zip'
  zip_ref = zipfile.ZipFile(local_zip, 'r')
  zip_ref.extractall('/content')
  zip_ref.close()

#data_download_unzip()
DATA_PATH = "Data"

**5. Data augmentation and statistics**

In [None]:
TRAIN = 'train'
VAL = 'val'
TEST = 'test'

data_transforms = {
  TRAIN: transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
  ]),

  VAL: transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
  ]),

  TEST: transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
  ])
}

image_datasets = {
  x: datasets.ImageFolder(
      os.path.join(DATA_PATH, x), 
      transform=data_transforms[x]
  )
  for x in [TRAIN, VAL, TEST]
}

dataloaders = {
  x: torch.utils.data.DataLoader(
      image_datasets[x], batch_size=8,
      shuffle=True, num_workers=4
  )
  for x in [TRAIN, VAL, TEST]
}
  
dataset_sizes = {x: len(image_datasets[x]) for x in [TRAIN, VAL, TEST]}

for x in [TRAIN, VAL, TEST]:
  print("Loaded {} images under {}".format(dataset_sizes[x], x))


class_names = image_datasets[TRAIN].classes
print("Classes: ", image_datasets[TRAIN].classes)

def get_count_metrics(folder, data_path=DATA_PATH):

    train_dir = os.path.join(data_path, folder)
    list_p = os.listdir(os.path.join(train_dir,'PNEUMONIA')) # dir is your directory path
    num_p = len(list_p)
    list_n = os.listdir(os.path.join(train_dir,'NORMAL')) # dir is your directory path
    num_n = len(list_n)
    list_c = os.listdir(os.path.join(train_dir,'COVID19')) # dir is your directory path
    num_c = len(list_c)
    count_tuple = (int(num_n), int(num_p), int(num_c))

    return count_tuple

print(get_count_metrics('train'))
print(get_count_metrics('test'))
print(get_count_metrics('val'))

**6. Visualize the dataset/images**

In [None]:
def imshow(inp, title=None):
    inp = inp.numpy().transpose((1, 2, 0))
    plt.axis('off')
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)

def show_databatch(inputs, classes):
    out = torchvision.utils.make_grid(inputs)
    imshow(out, title=[class_names[x] for x in classes])

# Get a batch of training data
inputs, classes = next(iter(dataloaders[TRAIN]))
show_databatch(inputs, classes)


**7. Define the VGG16 model**
  

*   Setting the model pre-trained parameter to True
*   Customizing the last fully connected layer with 3 output classes
*   Setting requires_grad to false for all layers 





In [None]:
vgg16 = models.vgg16_bn(pretrained=True)

# Freeze training for all layers
for param in vgg16.features.parameters():
    param.require_grad = False

num_features = vgg16.classifier[6].in_features
features = list(vgg16.classifier.children())[:-1] # Remove last layer
features.extend([nn.Linear(num_features, len(class_names))]) # Add our layer with 4 outputs
vgg16.classifier = nn.Sequential(*features) # Replace the model classifier

if use_gpu: 
  vgg16.cuda()

print(vgg16)

**7. Define loss function and optimizer**

In [None]:
criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

**8. Define funtion to evaluate dataset and calculate accuracy**

In [None]:
def eval_model(vgg, criterion, eval_type=TEST):
    since = time.time()
    avg_loss = avg_acc = loss_test = acc_test = 0
    
    total_batches = len(dataloaders[eval_type])
    print("Evaluating model for {} data. Total {} batchs.".format(eval_type, total_batches))
    print('-' * 30)
    
    for i, data in enumerate(dataloaders[eval_type]):
        if i % 50 == 0:
            print("\rTest batch {}/{}\n is in progress".format(i+1, total_batches), end='', flush=True)

        vgg.train(False)
        vgg.eval()

        inputs, labels = data
        if use_gpu:
            inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)

        outputs = vgg(inputs)
        _, preds = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)

        loss_test += loss.item()
        acc_test += torch.sum(preds == labels.data)
        del inputs, labels, outputs, preds
        torch.cuda.empty_cache()
        
    avg_loss = loss_test / dataset_sizes[eval_type]
    avg_acc = acc_test / dataset_sizes[eval_type]
    
    elapsed_time = time.time() - since
    print()
    print("Evaluation completed in {:.0f}m {:.0f}s".format(elapsed_time // 60, elapsed_time % 60))
    print("Avg loss ({}): {:.4f}".format(eval_type,avg_loss))
    print("Avg acc ({}): {:.4f}".format(eval_type,avg_acc))
    print('-' * 30)
    return avg_loss, avg_acc

**9. Define funtion to train the model with train data**

In [None]:
def train_model(vgg, criterion, optimizer, scheduler, num_epochs=10):

    since = time.time()
    best_model_wts = copy.deepcopy(vgg.state_dict())
    
    best_acc = avg_loss = avg_acc = avg_loss_val = avg_acc_val = 0.0
   
    train_batches = len(dataloaders[TRAIN])
    val_batches = len(dataloaders[VAL])
    
    for epoch in range(num_epochs):
        print("Epoch {}/{} is in progress".format(epoch+1, num_epochs))
        print('-' * 30)
        
        loss_train = loss_val = acc_train = acc_val = 0.0

        vgg.train(True)

        for i, data in enumerate(dataloaders[TRAIN]):
            if i % 100 == 0:
                print("\rTraining batch {}/{} is in progress".format(i+1, train_batches), end='', flush=True)
                               
            inputs, labels = data
            if use_gpu:
                inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
            else:
                inputs, labels = Variable(inputs), Variable(labels)
            
            optimizer.zero_grad()
            outputs = vgg(inputs)
            
            _, preds = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)            
            loss.backward()
            optimizer.step()            
        
            loss_train += loss.item()
            acc_train += torch.sum(preds == labels.data)           
            del inputs, labels, outputs, preds
            torch.cuda.empty_cache()
        
        avg_loss = loss_train/dataset_sizes[TRAIN]
        avg_acc = acc_train/dataset_sizes[TRAIN]
                         
        avg_loss_val, avg_acc_val = eval_model(vgg, criterion, VAL)
               
        print()
        print("Epoch {} result: ".format(epoch))
        print("Avg loss (train): {:.4f}".format(avg_loss))
        print("Avg acc (train): {:.4f}".format(avg_acc))
        print("Avg loss (val): {:.4f}".format(avg_loss_val))
        print("Avg acc (val): {:.4f}".format(avg_acc_val))
        print('-' * 30)
        print()

        if avg_acc_val > best_acc:
            best_acc = avg_acc_val
            best_model_wts = copy.deepcopy(vgg.state_dict())
        
    elapsed_time = time.time() - since
    print()
    print("Training completed in {:.0f}m {:.0f}s".format(elapsed_time // 60, elapsed_time % 60))
    print("Best acc: {:.4f}".format(best_acc))
    
    vgg.load_state_dict(best_model_wts)
    return vgg

**10. Define funtion to predict classification for few images**

In [None]:
def visualize_model(vgg, num_images=6):
    was_training = vgg.training
    
    # Set model for evaluation
    vgg.train(False)
    vgg.eval() 
    
    images_so_far = 0

    for i, data in enumerate(dataloaders[TEST]):
        inputs, labels = data
        size = inputs.size()[0]
        
        if use_gpu:
            inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)
        
        outputs = vgg(inputs)

        _, preds = torch.max(outputs.data, 1)
        predicted_labels = [preds[j] for j in range(inputs.size()[0])]
        
        print("Ground truth:")
        show_databatch(inputs.data.cpu(), labels.data.cpu())
        print("Prediction:")
        show_databatch(inputs.data.cpu(), predicted_labels)
        
        del inputs, labels, outputs, preds, predicted_labels
        torch.cuda.empty_cache()
        
        images_so_far += size
        if images_so_far >= num_images:
            break
        
    vgg.train(mode=was_training) # Revert model back to original training state

**11. Test the model accuracy before training**

In [None]:
#eval_model(vgg16, criterion, TEST)

**12. Test the model accuracy after training**

In [None]:
vgg16 = train_model(vgg16, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=10)

**13. Evaluae the model on test dataset**

In [None]:
avg_loss_test, avg_acc_test = eval_model(vgg16, criterion)

**14. Plot accuracy and loss**

In [None]:
#plt.figure(figsize=(10, 7))
#plt.plot(train_accuracy, color='green', label='train accuracy')
#plt.plot(val_accuracy, color='blue', label='validataion accuracy')
#plt.legend()
#plt.savefig('accuracy.png')
#plt.show()

**15. Predict classification for few images**

In [None]:
visualize_model(vgg16, num_images=32)