In [2]:
!pip install -r requirements.txt



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
!pip install split-folders


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [4]:
# Data split to test, train and validation subfolders
# import os
# import shutil
# import random
#
# random.seed(42) # for reproducibility
#
# # Paths
# input_dir = 'all_data'              # your source directory
# output_base = 'data_split'          # where splits will go
# splits = ['train', 'val', 'test']   # split folder names
# split_ratios = {'train': 0.7, 'val': 0.15, 'test': 0.15}
# classes = ['Covid', 'Normal']       # subfolder names
#
# # Create output directories
# for split in splits:
#     for cls in classes:
#         split_path = os.path.join(output_base, split, cls)
#         os.makedirs(split_path, exist_ok=True)
#
# for cls in classes:
#     imgs = os.listdir(os.path.join(input_dir, cls))
#     random.shuffle(imgs)
#     n_total = len(imgs)
#     n_train = int(n_total * split_ratios['train'])
#     n_val = int(n_total * split_ratios['val'])
#     n_test = n_total - n_train - n_val  # the rest
#
#     train_imgs = imgs[:n_train]
#     val_imgs = imgs[n_train:n_train + n_val]
#     test_imgs = imgs[n_train + n_val:]
#
#     for img_name, split in zip([train_imgs, val_imgs, test_imgs], splits):
#         dst_dir = os.path.join(output_base, split, cls)
#         for img in img_name:
#             src = os.path.join(input_dir, cls, img)
#             dst = os.path.join(dst_dir, img)
#             shutil.copy2(src, dst)



In [12]:
# Importing the required libraries

import torch
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
from torchvision import models


In [6]:
# Defining the transformations : image size and augmentation will depend on pre-trained model. Since, I am using VGG/ResNet, they expect 224 X 224 px inputs

train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [7]:
# Creating ImageFolders for each split

train_dataset = datasets.ImageFolder('data_split/train', transform=train_transforms)
val_dataset = datasets.ImageFolder('data_split/val', transform=val_test_transforms)
test_dataset = datasets.ImageFolder('data_split/test', transform=val_test_transforms)


In [8]:
# Creating DataLoaders for each split

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [9]:
# import os
# print(os.listdir('data_split/train/Covid'))
# from PIL import Image
# img = Image.open('data_split/train/Covid/COVID-19(134).png')
# img.show()


In [10]:
# check to see if batch is loading correctly
images, labels = next(iter(train_loader))
print('Image batch shape:', images.shape)
print('Label batch shape:', labels.shape)


Image batch shape: torch.Size([32, 3, 224, 224])
Label batch shape: torch.Size([32])


In [13]:
# Loading the pre-trained model and adapting the classifier

model = models.resnet50(pretrained=True)

# Freeze backbone parametersâ€”optional for transfer learning
for param in model.parameters():
    param.requires_grad = False

# Replace the FC layer (classifier) for binary (or multi-class) output
num_classes = 2  # covid/normal
model.fc = nn.Sequential(
    nn.Dropout(),
    nn.Linear(model.fc.in_features, num_classes)
)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /Users/rashila-lamichhane/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
24.0%IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

65.7%IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

100.0%


In [15]:
# Move model to device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [16]:
# setting up loss function and optimizer : Binary cross entropy loss works for image classification
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=1e-4)

In [17]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    print(f'Epoch {epoch+1}, Loss: {running_loss/total}, Acc: {correct/total}')

    # Validation step
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    print(f'  Val Loss: {val_loss/val_total}, Val Acc: {val_correct/val_total}')


Epoch 1, Loss: 0.47577372704838305, Acc: 0.7901008249312558
  Val Loss: 0.4001378178148822, Val Acc: 0.8626609442060086
Epoch 2, Loss: 0.365009196355297, Acc: 0.8597616865261228
  Val Loss: 0.3192114137400885, Val Acc: 0.8626609442060086
Epoch 3, Loss: 0.3147606367397265, Acc: 0.8606782768102658
  Val Loss: 0.2685484538262494, Val Acc: 0.8626609442060086
Epoch 4, Loss: 0.26924076873047653, Acc: 0.8716773602199817
  Val Loss: 0.22671085078061395, Val Acc: 0.8798283261802575
Epoch 5, Loss: 0.23852976924448685, Acc: 0.8835930339138405
  Val Loss: 0.1901414079727533, Val Acc: 0.9012875536480687
Epoch 6, Loss: 0.20626676492358872, Acc: 0.916590284142988
  Val Loss: 0.16839470330609785, Val Acc: 0.9227467811158798
Epoch 7, Loss: 0.18042185357630963, Acc: 0.9275893675527039
  Val Loss: 0.14837407685272683, Val Acc: 0.9313304721030042
Epoch 8, Loss: 0.16720448700006024, Acc: 0.9422548120989918
  Val Loss: 0.12948200925225353, Val Acc: 0.9613733905579399
Epoch 9, Loss: 0.1475535592731809, Acc: 

In [18]:
# Put model in evaluation mode
# 1. Put model in evaluation mode
model.eval()

test_loss = 0.0
correct = 0
total = 0

# 2. Disable gradient calculation (for efficiency)
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

avg_loss = test_loss / total
acc = correct / total
print(f'Test Loss: {avg_loss:.4f}, Test Accuracy: {acc:.4f}')



Test Loss: 0.1211, Test Accuracy: 0.9661


In [19]:
torch.save(model.state_dict(), 'covid_classifier.pth')


In [20]:
model.load_state_dict(torch.load('covid_classifier.pth'))
model.eval()


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 