In [48]:
import pandas as pd 
import numpy as np

In [49]:
import torch
from torch import nn, optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import os

In [50]:
data_dir = "cnn_training_data"
batch_size = 4
image_size = 224
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [51]:
from torchvision import models

model = models.resnet18(pretrained=True)  # pretrained on ImageNet

print(model)
num_features = model.fc.in_features


model.fc = nn.Linear(num_features, 2)    # your 2 classes

print(model)

model = model.to(device)


for param in model.parameters():
    param.requires_grad = False

for param in model.fc.parameters():
    param.requires_grad = True



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [52]:
import torch.optim as optim

optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)


In [53]:
import torch.nn as nn

criterion = nn.CrossEntropyLoss()


### Defining the Train loader for the data

In [54]:
# import torch
# from torchvision import datasets, transforms
# from torch.utils.data import DataLoader
# import torch
# from torchvision import datasets, transforms
# from torch.utils.data import DataLoader


# data_dir = '/Users/pavithrasenthilkumar/documents/my_codes/projects/lung_cancer_classification_CT_scan/artifacts/cnn_training_data' 

# # Define transforms (resize, to tensor, normalize)
# transform = transforms.Compose([
#     transforms.Resize((224, 224)),
#     transforms.ToTensor(),
#     # Normalize with ImageNet mean/std if using pretrained models
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], 
#                          std=[0.229, 0.224, 0.225]),
# ])

# dataset = datasets.ImageFolder(root='/Users/pavithrasenthilkumar/documents/my_codes/projects/lung_cancer_classification_CT_scan/artifacts/cnn_training_data', transform=transform)


# class_to_idx = {'lungs_ct': 1, 'not_lungs_ct': 0}
# dataset.class_to_idx = class_to_idx


# dataset.targets = [class_to_idx[dataset.classes[t]] for t in dataset.targets]
# dataset.classes = list(class_to_idx.keys())


### Visual inspection of created dataset

In [55]:
# print("Classes:", dataset.classes)
# print("Class to index:", dataset.class_to_idx)
# print("Number of samples:", len(dataset))

# for i in range(5):
#     print(dataset.imgs[i], dataset.targets[i])


#### Creating Dataloader for the Dataset - Split Train, Test

In [56]:
# from torch.utils.data import random_split, DataLoader

# train_size = int(0.8 * len(dataset))
# test_size = len(dataset) - train_size

# train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# # Create DataLoaders
# train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2)
# test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=2)

# print(f"Train samples: {len(train_dataset)}")
# print(f"Test samples: {len(test_dataset)}")


In [57]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import torch

# Make sure this directory has subfolders 'lungs_ct' and 'not_lungs_ct'
data_dir = "/Users/pavithrasenthilkumar/Documents/My_Codes/PROJECTS/Lung_Cancer_Classification_CT_Scan/artifacts/cnn_training_data"

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# DO NOT override class_to_idx manually
dataset = datasets.ImageFolder(root=data_dir, transform=transform)

print("Classes:", dataset.classes)           # Should be ['lungs_ct', 'not_lungs_ct']
print("Class to index:", dataset.class_to_idx)  # Should be {'lungs_ct': 0, 'not_lungs_ct': 1}

# Split
torch.manual_seed(42)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=2)

Classes: ['lungs_ct', 'not_lungs_ct']
Class to index: {'lungs_ct': 0, 'not_lungs_ct': 1}


In [58]:
import torch

torch.manual_seed(42)  # or any seed number
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])


### Training 

In [None]:
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    total_train = 0
    correct_train = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        correct_train += (preds == labels).sum().item()
        total_train += labels.size(0)

    epoch_loss = running_loss / total_train
    epoch_acc = 100 * correct_train / total_train

    # Evaluation
    model.eval()
    total_test = 0
    correct_test = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            total_test += labels.size(0)
            correct_test += (preds == labels).sum().item()

    test_acc = 100 * correct_test / total_test

    print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {epoch_loss:.4f} - Train Acc: {epoch_acc:.2f}% - Test Acc: {test_acc:.2f}%")

### Saving the model

In [60]:

torch.save(model.state_dict(), "/Users/pavithrasenthilkumar/documents/my_codes/projects/lung_cancer_classification_CT_scan/artifacts/lung_ct_resnet_model1.pth")
