In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import models, transforms
import pandas as pd
import os
from PIL import Image
from sklearn.model_selection import train_test_split # Needed if you didn't run the split yet

# --- Configuration ---
DATA_DIR = '/Users/tejii.03/Documents/Projects/Skin Cancer Detection/Skin-Cancer-Detection/data'
IMAGE_FOLDER = os.path.join(DATA_DIR, '/Users/tejii.03/Documents/Projects/Skin Cancer Detection/Skin-Cancer-Detection/data/images')
TRAIN_METADATA_PATH = os.path.join(DATA_DIR, 'train_metadata.csv')
VAL_METADATA_PATH = os.path.join(DATA_DIR, 'val_metadata.csv')
LABEL_MAP = {'nv': 0, 'mel': 1, 'bkl': 2, 'bcc': 3, 'akiec': 4, 'vasc': 5, 'df': 6}
NUM_CLASSES = len(LABEL_MAP)
BATCH_SIZE = 64
IMAGE_SIZE = 224

# --- 1. M4 GPU (MPS) Check ---
DEVICE = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"✅ Training device set to: {DEVICE}")

# --- 2. PyTorch Dataset Class (Reuse the one we discussed) ---
class HAM10000Dataset(torch.utils.data.Dataset):
    def __init__(self, metadata_path, image_folder, transform=None):
        self.data_frame = pd.read_csv(metadata_path)
        self.image_folder = image_folder
        self.transform = transform
        self.image_paths = {row['image_id']: os.path.join(image_folder, row['image_id'] + '.jpg')
                            for index, row in self.data_frame.iterrows()}
        self.data_frame['label'] = self.data_frame['dx'].map(LABEL_MAP)

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        row = self.data_frame.iloc[idx]
        img_id = row['image_id']
        label = row['label']
        img_path = self.image_paths.get(img_id)
        
        try:
            image = Image.open(img_path).convert('RGB')
        except:
            return self.__getitem__((idx + 1) % self.__len__()) # Skip corrupt images

        if self.transform:
            image = self.transform(image)
        
        return image, torch.tensor(label, dtype=torch.long)

# --- 3. Data Transforms ---
# Ensure you have your data augmentation for training!
train_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

val_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# --- 4. Load DataLoaders ---
# Assuming you have run the split and the CSVs exist:
train_dataset = HAM10000Dataset(TRAIN_METADATA_PATH, IMAGE_FOLDER, train_transform)
val_dataset = HAM10000Dataset(VAL_METADATA_PATH, IMAGE_FOLDER, val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

✅ Training device set to: mps


In [9]:
def setup_model(num_classes, device):
    # 1. Load a pre-trained ResNet50 model
    model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
    
    # 2. Freeze all layers (optional but good for initial training)
    # This prevents the pre-trained weights from changing much
    for param in model.parameters():
        param.requires_grad = False

    # 3. Replace the final classification layer
    # ResNet50's classifier is called 'fc' (fully connected)
    num_ftrs = model.fc.in_features # Get the number of features from the layer before 'fc'
    model.fc = nn.Linear(num_ftrs, num_classes)
    
    # 4. Move the model to the M4 GPU
    model = model.to(device)
    
    return model

model = setup_model(NUM_CLASSES, DEVICE)
print("Model loaded and moved to GPU.")

Model loaded and moved to GPU.


In [10]:
from sklearn.utils import class_weight
import numpy as np

# 1. Get the list of all labels from the training set
train_labels = train_dataset.data_frame['label'].values

# 2. Compute the balanced weights using scikit-learn
# 'balanced' mode automatically adjusts weights inversely proportional to class frequencies
class_weights = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(train_labels),
    y=train_labels
)

# 3. Convert weights to a PyTorch tensor and move to GPU
weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(DEVICE)
print("Calculated Class Weights (on GPU):", weights_tensor)

# --- Define Loss and Optimizer ---
criterion = nn.CrossEntropyLoss(weight=weights_tensor) # <-- Using the weighted loss!

# Only optimize the parameters of the newly added final layer (model.fc)
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

# A scheduler to gradually decrease the learning rate is also a good idea
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

Calculated Class Weights (on GPU): tensor([ 0.2134,  1.2860,  1.3021,  2.7848,  4.3686, 10.0401, 12.4410],
       device='mps:0')


In [11]:
def train_model(model, train_loader, criterion, optimizer, scheduler, device):
    model.train() # Set the model to training mode
    running_loss = 0.0
    correct_predictions = 0
    
    for inputs, labels in train_loader:
        # Move inputs and labels to the M4 GPU (MPS)
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad() # Zero the parameter gradients
        
        with torch.set_grad_enabled(True):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1) # Get the predicted class
            loss = criterion(outputs, labels)
            
            # Backpropagation
            loss.backward()
            optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        correct_predictions += torch.sum(preds == labels.data)

    scheduler.step() # Update the learning rate
    
    epoch_loss = running_loss / len(train_loader.dataset)
    
    # FIX APPLIED: Use .float() instead of .double() for MPS compatibility
    epoch_acc = correct_predictions.float() / len(train_loader.dataset)
    
    return epoch_loss, epoch_acc.item()

In [12]:
@torch.no_grad() # Tells PyTorch not to calculate gradients (saves memory/time)
def validate_model(model, val_loader, criterion, device):
    model.eval() # Set the model to evaluation mode
    running_loss = 0.0
    correct_predictions = 0
    
    for inputs, labels in val_loader:
        # Move inputs and labels to the M4 GPU (MPS)
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        running_loss += loss.item() * inputs.size(0)
        correct_predictions += torch.sum(preds == labels.data)
        
    epoch_loss = running_loss / len(val_loader.dataset)
    
    # FIX APPLIED: Use .float() instead of .double() for MPS compatibility
    epoch_acc = correct_predictions.float() / len(val_loader.dataset)
    
    return epoch_loss, epoch_acc.item()

In [14]:
# --- Main Training Loop ---
NUM_EPOCHS = 50 # Start with a small number to test the setup

history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': []}

print("\nStarting Training...")
for epoch in range(NUM_EPOCHS):
    # Train
    train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, scheduler, DEVICE)
    
    # Validate
    val_loss, val_acc = validate_model(model, val_loader, criterion, DEVICE)
    
    # Record history
    history['train_loss'].append(train_loss)
    history['val_loss'].append(val_loss)
    history['train_acc'].append(train_acc)
    history['val_acc'].append(val_acc)
    
    print(f"Epoch {epoch+1}/{NUM_EPOCHS} - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

print("Training Complete!")

# Save the trained model
torch.save(model.state_dict(), 'skin_cancer_resnet50_final.pth')
print("Model saved.")


Starting Training...
Epoch 1/50 - Train Loss: 0.9445, Train Acc: 0.6564 | Val Loss: 1.0091, Val Acc: 0.6370
Epoch 2/50 - Train Loss: 0.9166, Train Acc: 0.6563 | Val Loss: 1.0199, Val Acc: 0.6535
Epoch 3/50 - Train Loss: 0.9220, Train Acc: 0.6631 | Val Loss: 1.0046, Val Acc: 0.6445
Epoch 4/50 - Train Loss: 0.9407, Train Acc: 0.6616 | Val Loss: 1.0071, Val Acc: 0.6440
Epoch 5/50 - Train Loss: 0.9044, Train Acc: 0.6639 | Val Loss: 0.9948, Val Acc: 0.6575
Epoch 6/50 - Train Loss: 0.9116, Train Acc: 0.6659 | Val Loss: 1.0006, Val Acc: 0.6420
Epoch 7/50 - Train Loss: 0.9015, Train Acc: 0.6625 | Val Loss: 0.9915, Val Acc: 0.6535
Epoch 8/50 - Train Loss: 0.9038, Train Acc: 0.6620 | Val Loss: 0.9982, Val Acc: 0.6545
Epoch 9/50 - Train Loss: 0.9222, Train Acc: 0.6633 | Val Loss: 1.0035, Val Acc: 0.6455
Epoch 10/50 - Train Loss: 0.9201, Train Acc: 0.6667 | Val Loss: 0.9992, Val Acc: 0.6560
Epoch 11/50 - Train Loss: 0.8955, Train Acc: 0.6651 | Val Loss: 0.9993, Val Acc: 0.6555
Epoch 12/50 - Train

KeyboardInterrupt: 