In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
from PIL import Image
import os


In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cpu


In [7]:
class SkinCancerDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.metadata = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

        # Map diagnosis to cancerous (1) or non-cancerous (0)
        self.metadata['label'] = self.metadata['dx'].map({
            'nv': 0,      # Non-cancerous
            'bkl': 0,     # Non-cancerous
            'df': 0,      # Non-cancerous
            'vasc': 0,    # Non-cancerous
            'mel': 1,     # Cancerous
            'bcc': 1,     # Cancerous
            'akiec': 1    # Cancerous
        })

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        img_name = self.metadata.iloc[idx]['image_id']
        img_path = os.path.join(self.img_dir, f"{img_name}.jpg")

        image = Image.open(img_path).convert("RGB")
        label = self.metadata.iloc[idx]['label']

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label, dtype=torch.float32)


In [8]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])


In [12]:
csv_path = "C:/Users/Sujani/Desktop/FinalYearProject/training/data/HAM10000_metadata.csv"   # Adjust if needed
images_path = "C:/Users/Sujani/Desktop/FinalYearProject/training/data/HAM10000_images"       # Adjust if needed

dataset = SkinCancerDataset(csv_file=csv_path, img_dir=images_path, transform=transform)
# Split into train and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [13]:
class SkinCancerCNN(nn.Module):
    def __init__(self):
        super(SkinCancerCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 16 * 16)
        x = torch.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x

model = SkinCancerCNN().to(device)


In [14]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [15]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        labels = labels.unsqueeze(1)  # because model output is (batch_size, 1)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

print("Training completed!")


Epoch [1/10], Loss: 0.4450
Epoch [2/10], Loss: 0.4091
Epoch [3/10], Loss: 0.3853
Epoch [4/10], Loss: 0.3752
Epoch [5/10], Loss: 0.3572
Epoch [6/10], Loss: 0.3465
Epoch [7/10], Loss: 0.3445
Epoch [8/10], Loss: 0.3360
Epoch [9/10], Loss: 0.3262
Epoch [10/10], Loss: 0.3234
Training completed!


In [18]:
# Stage 1 Accuracy Checking
model.eval()  # use the model trained in Stage 1

correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        labels = labels.unsqueeze(1)

        outputs = model(inputs)
        predicted = (outputs > 0.5).float()

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy_stage1 = 100 * correct / total
print(f"Stage 1 Model Test Accuracy: {accuracy_stage1:.2f}%")


Stage 1 Model Test Accuracy: 84.22%


In [17]:
torch.save(model.state_dict(), "C:/Users/Sujani/Desktop/FinalYearProject/backend/model_stage1.pt")
print("Model saved successfully!")


Model saved successfully!
