# Two Binary Classifier

## Import necessary libraries

In [36]:
import copy
import random
import time
import os

import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms

from torch.optim.lr_scheduler import _LRScheduler
import torch.utils.data as data

import torchvision.transforms as transforms
import torchvision.datasets as datasets

from sklearn import decomposition
from sklearn import manifold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

## Prepare datasets

In [34]:
# train
dir = "dataset/train"
train_normal=len(os.listdir(os.path.join(dir,"normal")))
train_infected_covid=len(os.listdir(os.path.join(dir,"infected","covid")))
train_infected_non_covid=len(os.listdir(os.path.join(dir,"infected","non-covid")))

# val
dir = "dataset/val"
val_normal=len(os.listdir(os.path.join(dir,"normal")))
val_infected_covid=len(os.listdir(os.path.join(dir,"infected","covid")))
val_infected_non_covid=len(os.listdir(os.path.join(dir,"infected","non-covid")))

# test
dir = "dataset/test"
test_normal=len(os.listdir(os.path.join(dir,"normal")))
test_infected_covid=len(os.listdir(os.path.join(dir,"infected","covid")))
test_infected_non_covid=len(os.listdir(os.path.join(dir,"infected","non-covid")))

In [5]:
# print
print("Train")
print("Normal:", train_normal)
print("Infected:", train_infected_covid + train_infected_non_covid)
print("Infected covid:", train_infected_covid)
print("Infected non covid:", train_infected_non_covid)

print("============================================================")
print("Validation")
print("Normal:", val_normal)
print("Infected:", val_infected_covid + val_infected_non_covid)
print("Infected covid:", val_infected_covid)
print("Infected non covid:", val_infected_non_covid)

print("============================================================")
print("Test")
print("Normal:", test_normal)
print("Infected:", test_infected_covid + test_infected_non_covid)
print("Infected covid:", test_infected_covid)
print("Infected non covid:", test_infected_non_covid)

Train
Normal: 1341
Infected: 3875
Infected covid: 1345
Infected non covid: 2530
Validation
Normal: 8
Infected: 17
Infected covid: 9
Infected non covid: 8
Test
Normal: 234
Infected: 381
Infected covid: 139
Infected non covid: 242


In [6]:
# Number of images in each part of the dataset
dataset_numbers = {'train_normal': 1341,\
                   'train_infected': 3875,\
                   'train_infected_covid': 1345,\
                   'train_infected_non_covid': 2530, \
                   'val_normal': 8,\
                   'val_infected': 17, \
                   'val_infected_covid': 9,\
                   'val_infected_non_covid': 8,\
                   'test_normal': 234, \
                   'test_infected': 381, \
                   'test_infected_covid': 139, \
                   'test_infected_non_covid': 242}

## Deep Learning Model - AlexNet
We will primarily use AlexNet for both binary classifiers

In [11]:
class AlexNet(nn.Module):
    def __init__(self, output_dim):
        super().__init__()
        
        self.features = nn.Sequential(
            nn.Conv2d(1, 96, 11, 1, 1), #in_channels, out_channels, kernel_size, stride, padding
            nn.MaxPool2d(3), #kernel_size
            nn.ReLU(inplace = True),
            nn.Conv2d(96, 256, 5, padding = 0),
            nn.MaxPool2d(3),
            nn.ReLU(inplace = True),
            nn.Conv2d(256, 384, 3, padding = 0),
            nn.ReLU(inplace = True),
            nn.Conv2d(384, 384, 3, padding = 0),
            nn.ReLU(inplace = True),
            nn.Conv2d(384, 256, 3, padding = 0),
            nn.MaxPool2d(3),
            nn.ReLU(inplace = True)
        )
        
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256*2*2 , 4096),
            nn.ReLU(inplace = True),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace = True),
            nn.Linear(4096, output_dim),
        )

    def forward(self, x):
        x = self.features(x)
        h = x.view(x.shape[0], -1)
        x = self.classifier(h)
        return x, h

In [12]:
# Initialise model
OUTPUT_DIM = 2
model = AlexNet(OUTPUT_DIM)

In [37]:
# Fix a seed
SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [13]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 24,711,874 trainable parameters


In [14]:
def initialize_parameters(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight.data, nonlinearity = 'relu')
        nn.init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Linear):
        nn.init.xavier_normal_(m.weight.data, gain = nn.init.calculate_gain('relu'))
        nn.init.constant_(m.bias.data, 0)

In [15]:
model.apply(initialize_parameters)

AlexNet(
  (features): Sequential(
    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(1, 1), padding=(1, 1))
    (1): MaxPool2d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU(inplace=True)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1))
    (4): MaxPool2d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
    (5): ReLU(inplace=True)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1))
    (11): MaxPool2d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
    (12): ReLU(inplace=True)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=1024, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=4096, out_features=40

In [10]:
def plot_images(images, labels, classes, normalize = False):

    n_images = len(images)

    rows = int(np.sqrt(n_images))
    cols = int(np.sqrt(n_images))

    fig = plt.figure(figsize = (10, 10))

    for i in range(rows*cols):

        ax = fig.add_subplot(rows, cols, i+1)
        
        image = images[i]

        if normalize:
            image_min = image.min()
            image_max = image.max()
            image.clamp_(min = image_min, max = image_max)
            image.add_(-image_min).div_(image_max - image_min + 1e-5)

        ax.imshow(image.permute(1, 2, 0).cpu().numpy())
        ax.set_title(classes[labels[i]])
        ax.axis('off')

## Data Loader for two binary classifier

### Binary Classifier #1

Run the cells below to pick the normal & infected (combined) data set

In [44]:
from dataloader import Binary_Lung_Dataset

# Test
bs = 16
ld_train = Binary_Lung_Dataset(groups="train",  classify="normal")
ld_val = Binary_Lung_Dataset(groups="val", classify="normal")
ld_test = Binary_Lung_Dataset(groups="test",  classify="normal")
train_loader = DataLoader(ld_train, batch_size = bs, shuffle = True)
test_loader = DataLoader(ld_test, batch_size = bs, shuffle = True)
val_loader = DataLoader(ld_val, batch_size = bs, shuffle = True)

print(train_loader)
print(val_loader)
print(test_loader)

<torch.utils.data.dataloader.DataLoader object at 0x7f7867bb4080>
<torch.utils.data.dataloader.DataLoader object at 0x7f7867bb4668>
<torch.utils.data.dataloader.DataLoader object at 0x7f7867bb42e8>


In [45]:
for k, v in enumerate(train_loader):
    print("-----")
    print(k)
    print(v[0].shape)
    # Forced stop
    break

for k, v in enumerate(test_loader):
    print("-----")
    print(k)
    print(v[0].shape)
    # Forced stop
    break

for k, v in enumerate(val_loader):
    print("-----")
    print(k)
    print(v[0].shape)
    # Forced stop
    break

-----
0
torch.Size([16, 1, 150, 150])
-----
0
torch.Size([16, 1, 150, 150])
-----
0
torch.Size([16, 1, 150, 150])


### Training Phase

In [16]:
def train(model, iterator, optimizer, criterion, device):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for (x, y) in iterator:
        
        x = x.to(device)
        y = y.to(device)
        
        optimizer.zero_grad()
                
        y_pred, _ = model(x)
        
        loss = criterion(y_pred, torch.max(y,1)[1])
        
        acc = calculate_accuracy(y_pred, torch.max(y,1)[1])
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [75]:
def calculate_accuracy(y_pred, y):
    top_pred = y_pred.argmax(1, keepdim = True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

In [None]:
def confusion_matrix(y_pred,y):
    top_pred = y_pred.argmax(1, keepdim = True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()

In [19]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [47]:
START_LR = 1e-7

optimizer = optim.Adam(model.parameters(), lr = START_LR)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

criterion = nn.NLLLoss()

model = model.to(device)
criterion = criterion.to(device)

### Start Training

In [50]:
EPOCHS = 20

best_valid_loss = float('inf')

for epoch in range(EPOCHS):
    
    start_time = time.monotonic()
    
    train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
    valid_loss, valid_acc = evaluate(model, test_loader, criterion, device)
        
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'two_binary_classifier_1_alexnet.pt')

    end_time = time.monotonic()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    print(f'Epoch: {epoch+1:02} - Epoch Duration: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Epoch: 01 - Epoch Duration: 0m 12s
	Train Loss: -1995.647 | Train Acc: 74.29%
	 Val. Loss: -1723.940 |  Val. Acc: 61.88%
Epoch: 02 - Epoch Duration: 0m 12s
	Train Loss: -2325.312 | Train Acc: 74.29%
	 Val. Loss: -2022.510 |  Val. Acc: 62.09%
Epoch: 03 - Epoch Duration: 0m 12s
	Train Loss: -2712.055 | Train Acc: 74.29%
	 Val. Loss: -2368.600 |  Val. Acc: 61.88%
Epoch: 04 - Epoch Duration: 0m 12s
	Train Loss: -3173.330 | Train Acc: 74.29%
	 Val. Loss: -2784.061 |  Val. Acc: 62.29%
Epoch: 05 - Epoch Duration: 0m 12s
	Train Loss: -3706.755 | Train Acc: 74.29%
	 Val. Loss: -3262.450 |  Val. Acc: 62.29%
Epoch: 06 - Epoch Duration: 0m 12s
	Train Loss: -4332.899 | Train Acc: 74.29%
	 Val. Loss: -3813.598 |  Val. Acc: 61.88%
Epoch: 07 - Epoch Duration: 0m 12s
	Train Loss: -5055.871 | Train Acc: 74.29%
	 Val. Loss: -4470.702 |  Val. Acc: 62.09%
Epoch: 08 - Epoch Duration: 0m 12s
	Train Loss: -5885.456 | Train Acc: 74.29%
	 Val. Loss: -5198.708 |  Val. Acc: 61.68%
Epoch: 09 - Epoch Duration: 0m 1

### Evaluation

In [73]:
def evaluate(model, iterator, criterion, device):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
        
        for (x, y) in iterator:

            x = x.to(device)
            y = y.to(device)

            y_pred, _ = model(x)

            loss = criterion(y_pred, torch.max(y,1)[1])

            acc = calculate_accuracy(y_pred, torch.max(y,1)[1])

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [71]:
saved_model_path = 'two_binary_classifier_1_alexnet.pt'
output_dim = 2

# Load from saved model
model = AlexNet(output_dim)
model.load_state_dict(torch.load(saved_model_path))

<All keys matched successfully>

In [76]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
criterion = criterion.to(device)

# Load from saved model and evaluate
evaluation_results = evaluate(model, test_loader, criterion, device)
print(evaluation_results)

(-27526.669759114582, 0.6356169879436493)


### Binary Classifier #2

Run the cells below to pick the infected data set consisting of covid & non-covid

In [55]:
from dataloader import Binary_Lung_Dataset

# Test
bs = 16
ld_train = Binary_Lung_Dataset(groups="train",  classify="infected")
ld_val = Binary_Lung_Dataset(groups="val", classify="infected")
ld_test = Binary_Lung_Dataset(groups="test",  classify="infected")
train_loader = DataLoader(ld_train, batch_size = bs, shuffle = True)
test_loader = DataLoader(ld_test, batch_size = bs, shuffle = True)
val_loader = DataLoader(ld_val, batch_size = bs, shuffle = True)

print(train_loader)
print(val_loader)
print(test_loader)

<torch.utils.data.dataloader.DataLoader object at 0x7f78676b37b8>
<torch.utils.data.dataloader.DataLoader object at 0x7f7867bb44e0>
<torch.utils.data.dataloader.DataLoader object at 0x7f78676b3550>


In [56]:
for k, v in enumerate(train_loader):
    print("-----")
    print(k)
    print(v[0].shape)
    # Forced stop
    break

for k, v in enumerate(test_loader):
    print("-----")
    print(k)
    print(v[0].shape)
    # Forced stop
    break

for k, v in enumerate(val_loader):
    print("-----")
    print(k)
    print(v[0].shape)
    # Forced stop
    break

-----
0
torch.Size([16, 1, 150, 150])
-----
0
torch.Size([16, 1, 150, 150])
-----
0
torch.Size([16, 1, 150, 150])


### Training Phase

Some functions are written above in Binary Classfier #1 already

In [58]:
START_LR = 1e-7

optimizer = optim.Adam(model.parameters(), lr = START_LR)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

criterion = nn.NLLLoss()

model = model.to(device)
criterion = criterion.to(device)

### Start Training

In [60]:
EPOCHS = 20

best_valid_loss = float('inf')

for epoch in range(EPOCHS):
    
    start_time = time.monotonic()
    
    train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
    valid_loss, valid_acc = evaluate(model, test_loader, criterion, device)
        
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'two_binary_classifier_2_alexnet.pt')

    end_time = time.monotonic()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    print(f'Epoch: {epoch+1:02} - Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Epoch: 01 - Epoch Time: 0m 10s
	Train Loss: -141920.464 | Train Acc: 65.18%
	 Val. Loss: -138267.469 |  Val. Acc: 63.62%
Epoch: 02 - Epoch Time: 0m 9s
	Train Loss: -152703.510 | Train Acc: 65.29%
	 Val. Loss: -148712.132 |  Val. Acc: 63.62%
Epoch: 03 - Epoch Time: 0m 9s
	Train Loss: -164244.611 | Train Acc: 65.29%
	 Val. Loss: -159828.820 |  Val. Acc: 63.56%
Epoch: 04 - Epoch Time: 0m 9s
	Train Loss: -176023.972 | Train Acc: 65.29%
	 Val. Loss: -171474.725 |  Val. Acc: 63.38%
Epoch: 05 - Epoch Time: 0m 9s
	Train Loss: -189103.850 | Train Acc: 65.29%
	 Val. Loss: -184232.952 |  Val. Acc: 63.50%
Epoch: 06 - Epoch Time: 0m 9s
	Train Loss: -202725.753 | Train Acc: 65.29%
	 Val. Loss: -197547.882 |  Val. Acc: 63.38%
Epoch: 07 - Epoch Time: 0m 9s
	Train Loss: -217177.383 | Train Acc: 65.18%
	 Val. Loss: -211912.485 |  Val. Acc: 63.56%
Epoch: 08 - Epoch Time: 0m 9s
	Train Loss: -232423.012 | Train Acc: 65.41%
	 Val. Loss: -226580.790 |  Val. Acc: 63.32%
Epoch: 09 - Epoch Time: 0m 8s
	Train Lo

### Evaluation

In [68]:
## Evaluation
output_dim = 2
saved_model_path = 'two_binary_classifier_2_alexnet.pt'


# Load from saved model
model = AlexNet(output_dim)
model.load_state_dict(torch.load(saved_model_path))

<All keys matched successfully>

In [119]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
criterion = criterion.to(device)

# Load from saved model and evaluate
evaluation_results = evaluate(model, test_loader, criterion, device)
print(evaluation_results)

(-27567.50244140625, 0.6374198744694392)


## Combined Evaluation

In [116]:
## Evaluation
output_dim = 2
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.NLLLoss()

criterion = criterion.to(device)



# Model 1
saved_model_path_1 = 'two_binary_classifier_1_alexnet.pt'
# Load from saved model
model1 = AlexNet(output_dim)
model1.load_state_dict(torch.load(saved_model_path_1))
model1 = model1.to(device)


# Model 2
saved_model_path_2 = 'two_binary_classifier_2_alexnet.pt'
# Load from saved model
model2 = AlexNet(output_dim)
model2.load_state_dict(torch.load(saved_model_path_2))
model2 = model2.to(device)