In [1]:
import os
import shutil
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import os
import pandas as pd
from torchvision.io import read_image
from torch.utils.data import Dataset
import cv2
from torch.utils.data import DataLoader
import numpy as np

In [None]:
from sklearn import metrics
def metric(y_test, y_pred):
    matrix = metrics.confusion_matrix(y_test, y_pred)
    accuracy = metrics.accuracy_score(y_test, y_pred)
    precision = metrics.precision_score(y_test, y_pred)
    recall = metrics.recall_score(y_test, y_pred)
    f1 = metrics.f1_score(y_test, y_pred)
    return matrix,accuracy,precision,recall,f1

<h1>Steps to load data from drive to google collab

In [None]:
!gdown --id 1pfywfqKz5OYzdADGPNpMEqBpX9TX1cMu

Downloading...
From: https://drive.google.com/uc?id=1pfywfqKz5OYzdADGPNpMEqBpX9TX1cMu
To: /content/cell_images.zip
100% 367M/367M [00:01<00:00, 189MB/s]


In [None]:
!unzip cell_images
!mkdir dataset


Archive:  cell_images.zip
replace __MACOSX/._cell_images? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
# os.mkdir("dataset")
os.mkdir("dataset/train")
os.mkdir("dataset/test")
os.mkdir("dataset/train/parasite")
os.mkdir("dataset/test/parasite")
os.mkdir("dataset/train/uninfected")
os.mkdir("dataset/test/uninfected")

In [None]:
def move_files(abs_dirname, train_dir, test_dir):
    """Move files into subdirectories."""

    files = [os.path.join(abs_dirname, f) for f in os.listdir(abs_dirname)]

    i = 0
    curr_subdir = None
    files.sort()

    for f in files:
        # create new subdir if necessary
        if i == 0:
            curr_subdir = train_dir
        
        if i == 11025:
            curr_subdir = test_dir

        # move file to current dir
        f_base = os.path.basename(f)
        shutil.copy(f, os.path.join(curr_subdir, f_base))
        i += 1
        if(i%1000 == 0):
            print(i)

In [None]:
abs_dirname = "cell_images/Parasitized/"
train_dir = "dataset/train/parasite"
test_dir = "dataset/test/parasite"
move_files(abs_dirname, train_dir, test_dir)

In [None]:
abs_dirname = "cell_images/Uninfected/"
train_dir = "dataset/train/uninfected"
test_dir = "dataset/test/uninfected"
move_files(abs_dirname, train_dir, test_dir)

In [None]:
import pandas as pd
train_df = pd.DataFrame(columns = ["path", "label"])
test_df = pd.DataFrame(columns = ["path", "label"])

abs_dirname  = "dataset/train/parasite"
files = [os.path.join(abs_dirname, f) for f in os.listdir(abs_dirname)]
for f in files:
    f_base = os.path.basename(f)
    if f_base.endswith('.png'):
        train_df = train_df.append({'path': f_base, "label": 1}, ignore_index=True)

abs_dirname  = "dataset/train/uninfected"
files = [os.path.join(abs_dirname, f) for f in os.listdir(abs_dirname)]
for f in files:
    f_base = os.path.basename(f)
    if f_base.endswith('.png'):
        train_df = train_df.append({'path': f_base, "label": 0}, ignore_index=True)
    
train_df.to_csv("dataset/train.csv", index = False)


abs_dirname  = "dataset/test/parasite"
files = [os.path.join(abs_dirname, f) for f in os.listdir(abs_dirname)]
for f in files:
    f_base = os.path.basename(f)
    if f_base.endswith('.png'):
        test_df = test_df.append({'path': f_base, "label": 1}, ignore_index=True)
    
abs_dirname  = "dataset/test/uninfected"
files = [os.path.join(abs_dirname, f) for f in os.listdir(abs_dirname)]
for f in files:
    f_base = os.path.basename(f)
    if f_base.endswith('.png'):
        test_df = test_df.append({'path': f_base, "label": 0}, ignore_index=True)
    
test_df.to_csv("dataset/test.csv", index = False)

In [None]:
test_df

<h1>Creating dataloader for CNN model

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        label = self.img_labels['label'][idx]
        class_type = "parasite" if label == 1 else "uninfected"
        img_path = os.path.join(self.img_dir, class_type, self.img_labels.iloc[idx, 0])
#         print(img_path)
        image = cv2.imread(img_path)
        image = cv2.resize(image, (32, 32))
        image = np.moveaxis(image, -1, 0)
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image.astype(np.float32)/255.0, label

In [None]:
train_dir = "dataset/train"
test_dir = "dataset/test"
train_csv_path = "dataset/train.csv"
test_csv_path = "dataset/test.csv"

In [None]:
!ls

<h1>CNN

In [None]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [None]:
# Hyper-parameters 
num_epochs = 100
batch_size = 256
learning_rate = 0.005

In [None]:
##train and test dataloader
training_data = CustomImageDataset(annotations_file=train_csv_path, img_dir= train_dir)
train_loader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
testing_data = CustomImageDataset(annotations_file=test_csv_path, img_dir= test_dir)
test_loader = DataLoader(testing_data, batch_size=1024, shuffle=True)

In [None]:
#Dataframe to store accuracy
train_accuracy_df = pd.DataFrame(columns = ['Accuracy', 'Precision', 'Recall','F1'], 
                   index = ['VGG11', 'VGG13', 'VGG16', 'VGG19'])

test_accuracy_df = pd.DataFrame(columns = ['Accuracy', 'Precision', 'Recall','F1'], 
                   index = ['VGG11', 'VGG13', 'VGG16', 'VGG19'])

In [2]:
# clas for CNN architecture
class ConvNet(nn.Module):
    def __init__(self, cnn_type, in_channels=3, num_classes=1):
        super(ConvNet, self).__init__()
        self.in_channels = in_channels
        self.cnn_type = cnn_type
        self.VGG_types = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],}
        self.conv_layers = self.create_conv_layers(self.VGG_types[self.cnn_type])
        
        self.fcs = nn.Sequential(
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Dropout(p = 0.5),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Dropout(p = 0.5),
            nn.Linear(512, num_classes)
            )
        
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fcs(x)
        return torch.sigmoid(x)

    def create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels
        
        for x in architecture:
            if type(x) == int:
                out_channels = x
                
                layers += [nn.Conv2d(in_channels=in_channels,out_channels=out_channels,
                                     kernel_size=(3,3), stride=(1,1), padding=(1,1)),
                           nn.BatchNorm2d(x),
                           nn.ReLU()]
                in_channels = x
            elif x == 'M':
                layers += [nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))]
                
        return nn.Sequential(*layers)


In [None]:
#Function to train the model
def train(cnn_type, check):
  model = ConvNet(cnn_type = cnn_type).to(device)
  PATH = f'./{model.cnn_type}.pth'
  if(check == 'save'):
    pass
  elif check == 'load':
    print("Model already present")
    model.load_state_dict(torch.load(PATH))

  model.train()
  criterion = nn.BCELoss()
  optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
  loss_plot = []
  condition = 0
  n_total_steps = len(train_loader)
  for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
      images = images.to(device)
      labels = labels.unsqueeze(1)
      labels = labels.float()
      labels = labels.to(device)

      # Forward pass
      outputs = model(images)
      loss = criterion(outputs, labels)

     # Backward and optimize
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      condition = loss.item()
      if (i+1) % 64 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.6f}')
        loss_plot.append(loss.item())
    print('Model saved after epoch: ', epoch)
    torch.save(model.state_dict(), PATH)
      #    if condition < 5e-7:
      #     loss_plot.append(loss.item())
      #     break
      #  if condition < 5e-7:
      #    print("Terminating at epoch level")
      #    break
  print('Finished Training Completely')
  torch.save(model.state_dict(), PATH)

In [None]:
#Function to evaluate the test dataset
def test(data_loader, model):
  with torch.no_grad():
    y_pred = []
    y_hat = []
    for images, labels in data_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        outputs = outputs.cpu().detach().numpy()
        labels = labels.cpu().detach().numpy()
        y_pred.extend(outputs)
        y_hat.extend(labels)
  y_pred = np.array(y_pred).flatten()
  y_hat = np.array(y_hat).flatten()
  y_pred =  np.where(y_pred>=0.5, 1, 0) 
  return metric(y_hat, y_pred) 

In [None]:
train('VGG11', check = 'load')

Model already present
Epoch [1/100], Loss: 0.006328
Model saved after epoch:  0
Epoch [2/100], Loss: 0.013583
Model saved after epoch:  1
Epoch [3/100], Loss: 0.005936
Model saved after epoch:  2
Epoch [4/100], Loss: 0.001391
Model saved after epoch:  3
Epoch [5/100], Loss: 0.018151
Model saved after epoch:  4
Epoch [6/100], Loss: 0.006892
Model saved after epoch:  5
Epoch [7/100], Loss: 0.007882
Model saved after epoch:  6
Epoch [8/100], Loss: 0.000130
Model saved after epoch:  7
Epoch [9/100], Loss: 0.002457
Model saved after epoch:  8
Epoch [10/100], Loss: 0.005025
Model saved after epoch:  9
Epoch [11/100], Loss: 0.007549
Model saved after epoch:  10
Epoch [12/100], Loss: 0.001089
Model saved after epoch:  11
Epoch [13/100], Loss: 0.035273
Model saved after epoch:  12
Epoch [14/100], Loss: 0.004374
Model saved after epoch:  13
Epoch [15/100], Loss: 0.002940
Model saved after epoch:  14
Epoch [16/100], Loss: 0.001914
Model saved after epoch:  15
Epoch [17/100], Loss: 0.002556
Model 

In [None]:
model = ConvNet(cnn_type = 'VGG11').to(device)
PATH = f'./VGG11.pth'
model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [None]:
matrix, accuracy, precision, recall, f1 = test(test_loader, model)
test_accuracy_df.loc['VGG11'] = [accuracy,precision,recall,f1]
matrix

array([[2591,  163],
       [ 112, 2642]])

In [None]:
matrix, accuracy, precision, recall, f1 = test(train_loader, model)
train_accuracy_df.loc['VGG11'] = [accuracy,precision,recall,f1]
matrix

array([[11016,     9],
       [    1, 11024]])

VGG13

In [None]:
train('VGG13', check = 'save')

Epoch [1/100], Loss: 0.680203
Model saved after epoch:  0
Epoch [2/100], Loss: 0.612463
Model saved after epoch:  1
Epoch [3/100], Loss: 0.200373
Model saved after epoch:  2
Epoch [4/100], Loss: 0.129882
Model saved after epoch:  3
Epoch [5/100], Loss: 0.131890
Model saved after epoch:  4
Epoch [6/100], Loss: 0.152770
Model saved after epoch:  5
Epoch [7/100], Loss: 0.146547
Model saved after epoch:  6
Epoch [8/100], Loss: 0.158958
Model saved after epoch:  7
Epoch [9/100], Loss: 0.158812
Model saved after epoch:  8
Epoch [10/100], Loss: 0.170253
Model saved after epoch:  9
Epoch [11/100], Loss: 0.089195
Model saved after epoch:  10
Epoch [12/100], Loss: 0.108425
Model saved after epoch:  11
Epoch [13/100], Loss: 0.145614
Model saved after epoch:  12
Epoch [14/100], Loss: 0.078569
Model saved after epoch:  13
Epoch [15/100], Loss: 0.177840
Model saved after epoch:  14
Epoch [16/100], Loss: 0.138236
Model saved after epoch:  15
Epoch [17/100], Loss: 0.108747
Model saved after epoch:  16

KeyboardInterrupt: ignored

In [None]:
model = ConvNet(cnn_type = 'VGG13').to(device)
PATH = f'./VGG13.pth'
model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [None]:
matrix, accuracy, precision, recall, f1 = test(test_loader, model)
test_accuracy_df.loc['VGG13'] = [accuracy,precision,recall,f1]
matrix

array([[2605,  149],
       [ 110, 2644]])

In [None]:
matrix, accuracy, precision, recall, f1 = test(train_loader, model)
train_accuracy_df.loc['VGG13'] = [accuracy,precision,recall,f1]
matrix

array([[10900,   125],
       [   61, 10964]])

VGG 16

In [None]:
train('VGG16', check = 'load')

In [None]:
model = ConvNet(cnn_type = 'VGG16').to(device)
PATH = f'./VGG16.pth'
model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [None]:
matrix, accuracy, precision, recall, f1 = test(test_loader, model)
test_accuracy_df.loc['VGG16'] = [accuracy,precision,recall,f1]
matrix

array([[2670,   84],
       [ 150, 2604]])

In [None]:
matrix, accuracy, precision, recall, f1 = test(train_loader, model)
train_accuracy_df.loc['VGG16'] = [accuracy,precision,recall,f1]
matrix

array([[10917,   108],
       [   95, 10930]])

VGG 19

In [None]:
train('VGG19', check = 'save')

Epoch [1/100], Loss: 0.671630
Model saved after epoch:  0
Epoch [2/100], Loss: 0.394184
Model saved after epoch:  1
Epoch [3/100], Loss: 0.222454
Model saved after epoch:  2
Epoch [4/100], Loss: 0.130576
Model saved after epoch:  3
Epoch [5/100], Loss: 0.127092
Model saved after epoch:  4
Epoch [6/100], Loss: 0.093042
Model saved after epoch:  5
Epoch [7/100], Loss: 0.120667
Model saved after epoch:  6
Epoch [8/100], Loss: 0.116562
Model saved after epoch:  7
Epoch [9/100], Loss: 0.121100
Model saved after epoch:  8
Epoch [10/100], Loss: 0.107153
Model saved after epoch:  9
Epoch [11/100], Loss: 0.076220
Model saved after epoch:  10
Epoch [12/100], Loss: 0.101551
Model saved after epoch:  11
Epoch [13/100], Loss: 0.086301
Model saved after epoch:  12
Epoch [14/100], Loss: 0.089381
Model saved after epoch:  13
Epoch [15/100], Loss: 0.106028
Model saved after epoch:  14
Epoch [16/100], Loss: 0.092926
Model saved after epoch:  15
Epoch [17/100], Loss: 0.090328
Model saved after epoch:  16

In [None]:
model = ConvNet(cnn_type = 'VGG19').to(device)
PATH = f'./VGG19.pth'
model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [None]:
matrix, accuracy, precision, recall, f1 = test(test_loader, model)
test_accuracy_df.loc['VGG19'] = [accuracy,precision,recall,f1]
matrix

array([[2641,  113],
       [ 120, 2634]])

In [None]:
matrix, accuracy, precision, recall, f1 = test(train_loader, model)
train_accuracy_df.loc['VGG19'] = [accuracy,precision,recall,f1]
matrix

array([[11022,     3],
       [    8, 11017]])

In [None]:
train_accuracy_df

Unnamed: 0,Accuracy,Precision,Recall,F1
VGG11,0.999546,0.999184,0.999909,0.999547
VGG13,0.991565,0.988728,0.994467,0.991589
VGG16,0.990794,0.990216,0.991383,0.990799
VGG19,0.999501,0.999728,0.999274,0.999501


In [None]:
test_accuracy_df

Unnamed: 0,Accuracy,Precision,Recall,F1
VGG11,0.950073,0.941889,0.959332,0.950531
VGG13,0.952977,0.946652,0.960058,0.953308
VGG16,0.957516,0.96875,0.945534,0.957001
VGG19,0.957698,0.958864,0.956427,0.957644
