In [1]:
import numpy as np
import pandas as pd
import torch
import os
import matplotlib.pyplot as plt
import torch.nn.functional as F 
from torch import nn,optim
from torchvision import transforms as T,datasets,models
from torchvision.utils import make_grid
from torch.utils.data import DataLoader
from collections import OrderedDict
from tqdm import tqdm
from torch import nn, optim
from torch.autograd import Variable

In [2]:
def data_transforms(phase = None):
    if (phase == TRAIN):
        data_T = T.Compose([
                T.Resize(size = (256,256)),
                T.RandomRotation(degrees = (-20,+20)),
                T.CenterCrop(size=224),
                T.ToTensor(),
                T.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])
    
    elif (phase == TEST or phase == VAL):
        data_T = T.Compose([
                T.Resize(size = (224,224)),
                T.ToTensor(),
                T.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])
        
    return data_T

In [3]:
from sklearn.model_selection import train_test_split

data_dir = "dataset/chest_xray/chest_xray/"
TRAIN = 'train'
VAL ='val_2'
TEST = 'test'

train_set = datasets.ImageFolder(os.path.join(data_dir, TRAIN), transform = data_transforms(TRAIN))
val_set = datasets.ImageFolder(os.path.join(data_dir, VAL), transform = data_transforms(VAL))
test_set = datasets.ImageFolder(os.path.join(data_dir, TEST), transform = data_transforms(TEST))

In [4]:
class_names = train_set.classes
print(class_names)
print(train_set.class_to_idx)

['NORMAL', 'PNEUMONIA']
{'NORMAL': 0, 'PNEUMONIA': 1}


In [5]:
train_dl = DataLoader(train_set,batch_size = 64, shuffle = True)
val_dl = DataLoader(val_set,batch_size = 64, shuffle = True)
test_dl = DataLoader(test_set,batch_size = 64, shuffle = True)

images, labels = next(iter(train_dl))
print(images.shape)
print(labels.shape)

torch.Size([64, 3, 224, 224])
torch.Size([64])


In [6]:
# for i, (images,labels) in enumerate(trainloader):
#     if torch.cuda.is_available():
#         images=Variable(images.cuda())
#         labels=Variable(labels.cuda())
print(f"train: {len(train_set)} \t val: {len(val_set)} \t test: {len(test_set)}")

train: 5216 	 val: 1514 	 test: 624


In [7]:
images.shape, labels.shape

(torch.Size([64, 3, 224, 224]), torch.Size([64]))

In [8]:
class classify(nn.Module):
    def __init__(self,num_classes=2):
        super(classify,self).__init__()
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 12, kernel_size = 3, stride = 1, padding = 1)
        self.bn1 = nn.BatchNorm2d(num_features = 12)
        self.relu1 = nn.ReLU()        
        self.pool = nn.MaxPool2d(kernel_size = 2)
        self.conv2 = nn.Conv2d(in_channels = 12, out_channels = 20, kernel_size = 3, stride = 1, padding = 1)
        self.relu2 = nn.ReLU()
        self.conv3 = nn.Conv2d(in_channels = 20, out_channels = 32, kernel_size = 3, stride = 1, padding = 1)
        self.bn3 = nn.BatchNorm2d(num_features = 32)
        self.relu3 = nn.ReLU()
        self.fc = nn.Linear(in_features = 32 * 112 * 112, out_features=num_classes)
        
    def forward(self,input):
        output = self.conv1(input)
        output = self.bn1(output)
        output = self.relu1(output)
        output = self.pool(output)
        output = self.conv2(output)
        output = self.relu2(output)
        output = self.conv3(output)
        output = self.bn3(output)
        output = self.relu3(output)            
        output = output.view(-1, 32 * 112 * 112)
        output = self.fc(output)
            
        return output

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = classify()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# if torch.cuda.is_available():
#     model = model.cuda()
#     criterion = criterion.cuda()

In [10]:
import torch.nn as nn
import torch.optim as optim
from sklearn import metrics
import numpy as np

losses = []
num_epochs = 10
model.to(device)

for epoch in range(num_epochs):
    train_loss = 0.0
    train_correct = 0
    val_loss = 0.0
    val_correct = 0
    # running_loss = 0

    # Training
    model.train()
    for images, labels in train_dl:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        _, preds = torch.max(outputs, 1)
        train_loss += loss.item() * images.size(0)
        train_correct += torch.sum(preds == labels.data)

        losses.append(loss)
        
        # running_loss += loss.item()

    train_loss = train_loss / len(train_set)
    train_acc = train_correct.double() / len(train_set)
    # else:
    #     print("Epoch {} - Training loss: {}".format(i+1, running_loss/len(trainloader)))

    # Validation
    model.eval()
    val_preds = []
    val_labels = []
    
    with torch.no_grad():
        for images, labels in val_dl:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            # prob = torch.sigmoid(outputs)

            # preds = (prob > 0.5).long()
            _, preds = torch.max(outputs, 1)  # Convert logits to class predictions
            loss = criterion(outputs, labels)  # Use CrossEntropyLoss()

            # loss = criterion(outputs, labels)
            # loss = criterion(outputs.squeeze(), labels.float())

            val_loss += loss.item() * images.size(0)
            
            val_preds.extend(preds.cpu().detach().numpy().tolist())
            val_labels.extend(labels.cpu().detach().numpy().tolist())
            
    val_loss = val_loss / len(val_set)
    val_acc = metrics.accuracy_score(val_labels, val_preds)

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f}")
    print(f"Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f}")
    print(f"Val Acc: {val_acc:.4f}")
    print("--------------------------------------")

# Save the trained model
torch.save(model.state_dict(), "pneumonia_model.pth")

Epoch 1/10
Train Loss: 0.9352
Train Acc: 0.8712
Val Loss: 0.4221
Val Acc: 0.9075
--------------------------------------
Epoch 2/10
Train Loss: 0.1720
Train Acc: 0.9521
Val Loss: 0.1602
Val Acc: 0.9531
--------------------------------------
Epoch 3/10
Train Loss: 0.1967
Train Acc: 0.9467
Val Loss: 0.2770
Val Acc: 0.9313
--------------------------------------
Epoch 4/10
Train Loss: 0.1554
Train Acc: 0.9530
Val Loss: 0.1601
Val Acc: 0.9538
--------------------------------------
Epoch 5/10
Train Loss: 0.1374
Train Acc: 0.9611
Val Loss: 0.2341
Val Acc: 0.9386
--------------------------------------
Epoch 6/10
Train Loss: 0.2065
Train Acc: 0.9475
Val Loss: 0.2314
Val Acc: 0.9498
--------------------------------------
Epoch 7/10
Train Loss: 0.1497
Train Acc: 0.9613
Val Loss: 0.2407
Val Acc: 0.9505
--------------------------------------
Epoch 8/10
Train Loss: 0.1755
Train Acc: 0.9576
Val Loss: 0.3557
Val Acc: 0.9306
--------------------------------------
Epoch 9/10
Train Loss: 0.1397
Train Acc:

In [11]:
from sklearn.metrics import confusion_matrix

# Evaluate the model on the test dataset
model.load_state_dict(torch.load("pneumonia_model.pth"))
model.to(device)
model.eval()

test_loss = 0.0
test_correct = 0
test_preds = []
test_labels = []
image_names = []

with torch.no_grad():
    for images, labels in test_dl:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * images.size(0)

        _, preds = torch.max(outputs, 1)
        test_correct += torch.sum(preds == labels.data)

        test_preds.extend(preds.cpu().detach().numpy().tolist())
        test_labels.extend(labels.cpu().detach().numpy().tolist())
        image_names.extend(labels)

test_loss = test_loss / len(test_set)
test_acc = metrics.accuracy_score(test_labels, test_preds)

# Calculate confusion matrix
cm = confusion_matrix(test_labels, test_preds)
tn = cm[0, 0]  # True negatives
fp = cm[0, 1]  # False positives

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")
print("\n--------------------------------------\n")

test_precision = metrics.precision_score(test_labels, test_preds, average='weighted')
test_recall = metrics.recall_score(test_labels, test_preds, average='weighted')
test_f1_score = metrics.f1_score(test_labels, test_preds, average='weighted')
specificity = tn / (tn + fp)

print(f"Test Precision: {test_precision:.4f}")
print(f"Test Recall: {test_recall:.4f}")
print(f"Test F1 Score: {test_f1_score:.4f}")
print(f"Test Specificity: {specificity:.4f}")
print("\n--------------------------------------\n")

Test Loss: 1.2769
Test Accuracy: 0.8269

--------------------------------------

Test Precision: 0.8601
Test Recall: 0.8269
Test F1 Score: 0.8124
Test Specificity: 0.5470

--------------------------------------



In [12]:
# Print the predictions and corresponding image names
match = 0
mismatch = 0
for i in range(len(test_preds)):
    if (image_names[i] == test_preds[i]):
        match += 1
    elif (image_names[i] != test_preds[i]):
        mismatch += 1

    predicted_class = class_names[test_preds[i]]
    actual_class = class_names[image_names[i]]
    print(f"Actual class: {actual_class}, Predicted Label: {predicted_class}")

print("\n--------------------------------------\n")
print(f"Matches: {match}")
print(f"Mismatches: {mismatch}")

Actual class: NORMAL, Predicted Label: NORMAL
Actual class: PNEUMONIA, Predicted Label: PNEUMONIA
Actual class: NORMAL, Predicted Label: NORMAL
Actual class: NORMAL, Predicted Label: NORMAL
Actual class: PNEUMONIA, Predicted Label: PNEUMONIA
Actual class: PNEUMONIA, Predicted Label: PNEUMONIA
Actual class: PNEUMONIA, Predicted Label: PNEUMONIA
Actual class: PNEUMONIA, Predicted Label: PNEUMONIA
Actual class: PNEUMONIA, Predicted Label: PNEUMONIA
Actual class: NORMAL, Predicted Label: PNEUMONIA
Actual class: NORMAL, Predicted Label: PNEUMONIA
Actual class: NORMAL, Predicted Label: PNEUMONIA
Actual class: PNEUMONIA, Predicted Label: PNEUMONIA
Actual class: PNEUMONIA, Predicted Label: PNEUMONIA
Actual class: PNEUMONIA, Predicted Label: PNEUMONIA
Actual class: PNEUMONIA, Predicted Label: PNEUMONIA
Actual class: PNEUMONIA, Predicted Label: PNEUMONIA
Actual class: NORMAL, Predicted Label: NORMAL
Actual class: PNEUMONIA, Predicted Label: PNEUMONIA
Actual class: NORMAL, Predicted Label: NORMAL