In [13]:
import os
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from tqdm import tqdm
import matplotlib.pyplot as plt
from data_loader import get_dataloader
from PIL import Image
from pytorch_grad_cam import GradCAM, EigenCAM
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image, preprocess_image

### Configurations

In [None]:
data_dir = 'COVID19CTS224/S224/'
batch_size = 32
num_classes = 2
num_epochs = 10
lr = 0.001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

loader_train, loader_val, loader_test = get_dataloader()

In [15]:
random_seed = 42
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)

<torch._C.Generator at 0x19800aae410>

In [16]:

num_classes = 2


In [None]:
def get_model(model_name='resnet18', pretrained=False):
    if model_name == 'resnet18':
        model = models.resnet18(pretrained=pretrained)
    else:
        model = models.resnet50(pretrained=pretrained)

    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, num_classes)
    return model

model = get_model('resnet18', pretrained=False).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_model(model, loader, optimizer, criterion):
    model.train()
    running_loss, running_corrects = 0.0, 0
    for inputs, labels in loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        labels = labels.view(-1)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
    
    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = running_corrects.double() / len(loader.dataset)
    return epoch_loss, epoch_acc.item()

def evaluate_model(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            labels = labels.view(-1)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
            
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = running_corrects.double() / len(loader.dataset)
    return epoch_loss, epoch_acc.item()



In [18]:
num_epochs = 10
best_val_acc = 0.0
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}:")
    train_loss, train_acc = train_model(model, loader_train, optimizer, criterion)
    val_loss, val_acc = evaluate_model(model, loader_val, criterion)
    print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    print(f"  Val Loss:   {val_loss:.4f}, Val Acc:   {val_acc:.4f}")

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_model.pth")

# Load best model and evaluate on test
model.load_state_dict(torch.load("best_model.pth"))
test_loss, test_acc = evaluate_model(model, loader_test, criterion)
print(f"Test Accuracy: {test_acc*100:.2f}%")

Epoch 1/10:
  Train Loss: 0.4577, Train Acc: 0.7957
  Val Loss:   0.6912, Val Acc:   0.7000
Epoch 2/10:
  Train Loss: 0.2995, Train Acc: 0.8759
  Val Loss:   0.6199, Val Acc:   0.8500
Epoch 3/10:
  Train Loss: 0.2948, Train Acc: 0.8759
  Val Loss:   0.6497, Val Acc:   0.7500
Epoch 4/10:
  Train Loss: 0.2388, Train Acc: 0.9006
  Val Loss:   3.2005, Val Acc:   0.5833
Epoch 5/10:
  Train Loss: 0.1914, Train Acc: 0.9219
  Val Loss:   1.1179, Val Acc:   0.5667
Epoch 6/10:
  Train Loss: 0.1704, Train Acc: 0.9377
  Val Loss:   1.2764, Val Acc:   0.7333
Epoch 7/10:
  Train Loss: 0.1882, Train Acc: 0.9233
  Val Loss:   0.5841, Val Acc:   0.8667
Epoch 8/10:
  Train Loss: 0.1196, Train Acc: 0.9540
  Val Loss:   0.3260, Val Acc:   0.9167
Epoch 9/10:
  Train Loss: 0.1075, Train Acc: 0.9575
  Val Loss:   0.4536, Val Acc:   0.9000
Epoch 10/10:
  Train Loss: 0.0956, Train Acc: 0.9639
  Val Loss:   0.9052, Val Acc:   0.7500


  model.load_state_dict(torch.load("best_model.pth"))


Test Accuracy: 90.75%


Train from Scratch

In [19]:
model_scratch = get_model('resnet18', pretrained=False).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_scratch.parameters(), lr=lr)

print("==== Training from Scratch ====")
best_val_acc_scratch = 0.0
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    train_loss, train_acc = train_model(model_scratch, loader_train, optimizer, criterion)
    val_loss, val_acc = evaluate_model(model_scratch, loader_val, criterion)
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
    if val_acc > best_val_acc_scratch:
        best_val_acc_scratch = val_acc
        torch.save(model_scratch.state_dict(), "model_scratch_best.pth")

model_scratch.load_state_dict(torch.load("model_scratch_best.pth"))
test_loss_scratch, test_acc_scratch = evaluate_model(model_scratch, loader_test, criterion)
print(f"Test Accuracy (Scratch): {test_acc_scratch*100:.2f}%")

==== Training from Scratch ====
Epoch 1/10
Train Loss: 0.4576, Train Acc: 0.8027
Val Loss: 1.7228, Val Acc: 0.5667
Epoch 2/10
Train Loss: 0.3048, Train Acc: 0.8783
Val Loss: 0.7015, Val Acc: 0.6833
Epoch 3/10
Train Loss: 0.3014, Train Acc: 0.8749
Val Loss: 0.4543, Val Acc: 0.7667
Epoch 4/10
Train Loss: 0.2557, Train Acc: 0.8976
Val Loss: 0.4183, Val Acc: 0.8667
Epoch 5/10
Train Loss: 0.2540, Train Acc: 0.8922
Val Loss: 1.0038, Val Acc: 0.6333
Epoch 6/10
Train Loss: 0.1986, Train Acc: 0.9228
Val Loss: 4.2517, Val Acc: 0.5000
Epoch 7/10
Train Loss: 0.1611, Train Acc: 0.9367
Val Loss: 0.3003, Val Acc: 0.9167
Epoch 8/10
Train Loss: 0.1305, Train Acc: 0.9496
Val Loss: 0.2324, Val Acc: 0.9333
Epoch 9/10
Train Loss: 0.1095, Train Acc: 0.9590
Val Loss: 0.5351, Val Acc: 0.8333
Epoch 10/10
Train Loss: 0.1172, Train Acc: 0.9604
Val Loss: 0.3617, Val Acc: 0.9167


  model_scratch.load_state_dict(torch.load("model_scratch_best.pth"))


Test Accuracy (Scratch): 91.50%


In [20]:
model_transfer = get_model('resnet18', pretrained=True).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_transfer.parameters(), lr=lr)

print("==== Training with Transfer Learning ====")
best_val_acc_transfer = 0.0
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    train_loss, train_acc = train_model(model_transfer, loader_train, optimizer, criterion)
    val_loss, val_acc = evaluate_model(model_transfer, loader_val, criterion)
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
    if val_acc > best_val_acc_transfer:
        best_val_acc_transfer = val_acc
        torch.save(model_transfer.state_dict(), "model_transfer_best.pth")

model_transfer.load_state_dict(torch.load("model_transfer_best.pth"))
test_loss_transfer, test_acc_transfer = evaluate_model(model_transfer, loader_test, criterion)
print(f"Test Accuracy (Transfer): {test_acc_transfer*100:.2f}%")



==== Training with Transfer Learning ====
Epoch 1/10
Train Loss: 0.3295, Train Acc: 0.8694
Val Loss: 0.2765, Val Acc: 0.8833
Epoch 2/10
Train Loss: 0.1337, Train Acc: 0.9456
Val Loss: 4.3701, Val Acc: 0.6333
Epoch 3/10
Train Loss: 0.1274, Train Acc: 0.9481
Val Loss: 0.2163, Val Acc: 0.9333
Epoch 4/10
Train Loss: 0.0806, Train Acc: 0.9718
Val Loss: 0.6702, Val Acc: 0.8667
Epoch 5/10
Train Loss: 0.1110, Train Acc: 0.9604
Val Loss: 0.2214, Val Acc: 0.9667
Epoch 6/10
Train Loss: 0.0619, Train Acc: 0.9787
Val Loss: 0.0356, Val Acc: 1.0000
Epoch 7/10
Train Loss: 0.0337, Train Acc: 0.9921
Val Loss: 0.6045, Val Acc: 0.8500
Epoch 8/10
Train Loss: 0.1535, Train Acc: 0.9416
Val Loss: 1.0786, Val Acc: 0.7667
Epoch 9/10
Train Loss: 0.0596, Train Acc: 0.9862
Val Loss: 0.8082, Val Acc: 0.6833
Epoch 10/10
Train Loss: 0.1384, Train Acc: 0.9515
Val Loss: 0.7605, Val Acc: 0.7000


  model_transfer.load_state_dict(torch.load("model_transfer_best.pth"))


Test Accuracy (Transfer): 97.25%


In [25]:
test_csv_path = 'COVID19CTS224/S224/test.csv'
test_data = pd.read_csv(test_csv_path)
class_names = test_data['label'].unique()
base_path = 'COVID19CTS224/S224/'

covid_images = []
noncovid_images = []

for class_name in class_names:
    images_in_class = test_data['filename'][test_data['label'] == class_name].tolist()
    if class_name == '1':
        covid_images.extend([os.path.join(base_path, img) for img in images_in_class])
    else:
        noncovid_images.extend([os.path.join(base_path, img) for img in images_in_class])

random.shuffle(covid_images)
random.shuffle(noncovid_images)

selected_covid = covid_images[:10]
selected_noncovid = noncovid_images[:10]
selected_images = selected_covid + selected_noncovid

# Prepare CAMs
target_layers = [model_scratch.layer4[-1]]  # last convolutional layer for ResNet18
cam_methods = {
    'gradcam': GradCAM(model=model_scratch, target_layers=target_layers, reshape_transform=None),
    'eigencam': EigenCAM(model=model_scratch, target_layers=target_layers, reshape_transform=None)
}

def apply_cam(image_path, model, cam_method):
    rgb_img = np.array(Image.open(image_path).convert('RGB'))
    rgb_img = np.float32(rgb_img) / 255.0
    input_tensor = preprocess_image(rgb_img, mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
    input_tensor = input_tensor.to(device)

    # Forward pass to get prediction
    model.eval()
    with torch.no_grad():
        output = model(input_tensor)
        prediction = torch.argmax(output, dim=1).item()

    grayscale_cam = cam_method(input_tensor=input_tensor, targets=[ClassifierOutputTarget(prediction)])
    # grayscale_cam is [N, H, W], here N=1
    visualization = show_cam_on_image(rgb_img, grayscale_cam[0,:], use_rgb=True)
    return visualization, prediction

# Create directories to save CAM images
os.makedirs("cam_results_scratch", exist_ok=True)
os.makedirs("cam_results_transfer", exist_ok=True)

# Visualize using both methods for both models
# We'll do this for the from-scratch model first
print("Generating CAM visualizations for Scratch model...")
for i, img_path in enumerate(selected_images):
    for method_name, cam_method in cam_methods.items():
        visualization, pred_class = apply_cam(img_path, model_scratch, cam_method)
        plt.figure(figsize=(6,6))
        plt.title(f"{method_name.upper()} - Pred: {class_names[pred_class]}")
        plt.imshow(visualization)
        plt.axis('off')
        fname = f"scratch_{method_name}_img{i}.png"
        plt.savefig(os.path.join("cam_results_scratch", fname))
        plt.close()

# For the transfer model, we just change the model in CAM methods
target_layers_transfer = [model_transfer.layer4[-1]]
cam_methods_transfer = {
    'gradcam': GradCAM(model=model_transfer, target_layers=target_layers_transfer, reshape_transform=None),
    'eigencam': EigenCAM(model=model_transfer, target_layers=target_layers_transfer, reshape_transform=None)
}

print("Generating CAM visualizations for Transfer model...")
for i, img_path in enumerate(selected_images):
    for method_name, cam_method in cam_methods_transfer.items():
        visualization, pred_class = apply_cam(img_path, model_transfer, cam_method)
        plt.figure(figsize=(6,6))
        plt.title(f"{method_name.upper()} - Pred: {class_names[pred_class]}")
        plt.imshow(visualization)
        plt.axis('off')
        fname = f"transfer_{method_name}_img{i}.png"
        plt.savefig(os.path.join("cam_results_transfer", fname))
        plt.close()

Generating CAM visualizations for Scratch model...
Generating CAM visualizations for Transfer model...


### Transfer Learning Discussion

Transfer learning can provide several benefits over training from scratch, especially when dealing with limited training data or complex tasks. In this scenario, using a pre-trained ResNet model that has already learned rich feature representations from a large and diverse dataset (such as ImageNet) can significantly speed up the convergence and improve the final accuracy. 

When we train from scratch, the model starts with random weights and must learn all relevant low-level and high-level features directly from our COVID CT dataset. This often requires more data and epochs to achieve comparable performance. If our dataset is small or less diverse, the model might fail to generalize well and underperform.

On the other hand, transfer learning starts from a model that already has a good understanding of generic features (edges, textures, shapes). Therefore, it only needs to fine-tune these features to our specific task of identifying COVID vs Non-COVID CT images. Typically, we see faster convergence, reduced training time, and potentially higher final accuracy, as shown in the test results.

In conclusion, transfer learning usually brings extra benefits over training from scratch by leveraging previously learned representations, leading to improved generalization and performance in fewer training iterations.