In [None]:
#Importing libraries

import os
import torch
import torch.nn as nn
from torchvision import transforms, models
from torch.utils.data import DataLoader, random_split
from sklearn.preprocessing import LabelEncoder
from PIL import Image
import pandas as pd
from torch.utils.data import Dataset

In [None]:
# Dataset downloaded from kaggle. Kept in the below directory on the system
# Original Database on kaggle
# Replace and run the 'dataset path' command with this: dataset_path = kagglehub.dataset_download("kmader/skin-cancer-mnist-ham10000")

dataset_path = "D:/MS/Sem3/CSE575_StatisticalMachineLearning/Project/archive"
image_dir = os.path.join(dataset_path, "HAM10000_images")
csv_path = os.path.join(dataset_path, "HAM10000_metadata.csv")
#The images in the dataset were in 2 folders, merged them into one.

# Class for the dataset
class SkinCancerDataset(Dataset):
    def __init__(self, image_dir, csv_file, transform=None):
        self.image_dir = image_dir
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.label_encoder = LabelEncoder()
        self.data['label'] = self.label_encoder.fit_transform(self.data['dx'])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.data.iloc[idx]['image_id'] + ".jpg")
        image = Image.open(img_name).convert("RGB")
        label = self.data.iloc[idx]['label']
        if self.transform:
            image = self.transform(image)
        return image, label

In [None]:
# Resizing the dataset according to ResNet requirements
#Normalizing according to standars ImageNet fromat.
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


In [None]:
# Splitting the database in 80-20 ratio. Creating objects for shuffling
full_dataset = SkinCancerDataset(image_dir, csv_path, transform=transform)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [None]:
# Pretrained ResNet50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet = models.resnet50(pretrained=True)

# Freexing layers, except last.
for param in resnet.parameters():
    param.requires_grad = False

# Changing final layer to 7 labels for the output
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, 7)  
resnet = resnet.to(device)

# Defining loss and optimize functions. 
# Using CrossEntropyLoss for multiclass classification
# Using Adam optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet.fc.parameters(), lr=0.001)




In [None]:
# Training the model for 10 epochs. 
# Doing forward pass, computing loss and backpropogating the loss.
# Printing the loss per epoch

epochs = 10
for epoch in range(epochs):
    resnet.train()
    running_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = resnet(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs} - Loss: {running_loss/len(train_loader):.4f}")


Epoch 1/10 - Loss: 0.8679
Epoch 2/10 - Loss: 0.7014
Epoch 3/10 - Loss: 0.6503
Epoch 4/10 - Loss: 0.6386
Epoch 5/10 - Loss: 0.6201
Epoch 6/10 - Loss: 0.6062
Epoch 7/10 - Loss: 0.5849
Epoch 8/10 - Loss: 0.5899
Epoch 9/10 - Loss: 0.5693
Epoch 10/10 - Loss: 0.5502


In [None]:
# Evvaluation
# Comparing predictions with the labels and calculating the accuracy
resnet.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = resnet(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Validation Accuracy: {100 * correct / total:.2f}%")

Validation Accuracy: 77.68%
