In [None]:
from numpy import test
from torch.jit import load
from sklearn.utils import np
import kagglehub
import pandas as pd
from sklearn.model_selection import train_test_split
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset, Dataset
from torchvision.datasets import ImageFolder
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch
from tqdm import tqdm

import os

!pip install kaggle

from google.colab import files, drive
drive.mount('/content/drive')
from zipfile import ZipFile

zip_path = "/content/archive.zip"
extract_dir = "/content/drive/MyDrive/lung_cancer_data/Lung Cancer Dataset"

if not os.path.exists(extract_dir):
    os.makedirs(os.path.dirname(extract_dir), exist_ok=True)
    with ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall("/content/drive/MyDrive/lung_cancer_data")
    print("Unzipped successfully!")
else:
    print("Folder already exists. Skipping unzip.")

class ImageDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data = ImageFolder(root=data_dir, transform=transform)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

    def get_classes(self):
        return self.data.classes

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

dataset = ImageDataset(data_dir=extract_dir, transform=transform)
print (dataset.get_classes())

for image, label in dataset:
    break

num_classes = len(dataset.get_classes())

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=16, num_workers=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, num_workers=8, shuffle=False)

class LungCancerModel(nn.Module):
    def __init__(self, num_classes):
        super(LungCancerModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding= 1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding= 1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 56 * 56, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

model = LungCancerModel(num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 5

train_loss_per = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    train_loss_per.append(epoch_loss)
    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

    model.eval()
    running_loss = 0.0

    val_loss_per = []

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc=f"Testing"):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)

            predicted = torch.argmax(outputs, dim=1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

        val_loss = running_loss / len(test_loader.dataset)
        val_loss_per.append(val_loss)

        print(f"Training Loss: {train_loss_per[-1]:.4f}")

        accuracy = 100 * correct / total
        print(f"Validation: {accuracy:.4f}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Folder already exists. Skipping unzip.
['Benign cases', 'Normal cases', 'adenocarcinoma', 'large cell carcinoma', 'squamous cell carcinoma']


Epoch 1/5: 100%|██████████| 77/77 [01:32<00:00,  1.20s/it]


Epoch 1, Loss: 1.0838


Testing: 100%|██████████| 20/20 [00:29<00:00,  1.48s/it]


Training Loss: 1.0838
Validation: 72.3127


Epoch 2/5: 100%|██████████| 77/77 [00:11<00:00,  6.75it/s]


Epoch 2, Loss: 0.7248


Testing: 100%|██████████| 20/20 [00:02<00:00,  8.40it/s]


Training Loss: 0.7248
Validation: 75.8958


Epoch 3/5: 100%|██████████| 77/77 [00:11<00:00,  6.91it/s]


Epoch 3, Loss: 0.5668


Testing: 100%|██████████| 20/20 [00:02<00:00,  7.52it/s]


Training Loss: 0.5668
Validation: 81.7590


Epoch 4/5: 100%|██████████| 77/77 [00:10<00:00,  7.38it/s]


Epoch 4, Loss: 0.4297


Testing: 100%|██████████| 20/20 [00:02<00:00,  6.93it/s]


Training Loss: 0.4297
Validation: 85.0163


Epoch 5/5: 100%|██████████| 77/77 [00:10<00:00,  7.47it/s]


Epoch 5, Loss: 0.3302


Testing: 100%|██████████| 20/20 [00:03<00:00,  5.84it/s]

Training Loss: 0.3302
Validation: 87.2964



