In [None]:
# Install Kaggle package if not installed
!pip install -q kaggle

# Upload your kaggle.json (API token) manually or mount Drive where it's saved
from google.colab import files
files.upload()  # upload kaggle.json

# Make kaggle.json file accessible
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download HAM10000 dataset from Kaggle
!kaggle datasets download kmader/skin-cancer-mnist-ham10000

# Unzip the dataset
!unzip -q skin-cancer-mnist-ham10000.zip -d HAM10000

Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000
License(s): CC-BY-NC-SA-4.0
Downloading skin-cancer-mnist-ham10000.zip to /content
100% 5.19G/5.20G [02:17<00:00, 48.2MB/s]
100% 5.20G/5.20G [02:17<00:00, 40.6MB/s]


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import torchvision.transforms as T
import timm

import albumentations as A
from albumentations.pytorch import ToTensorV2

import pandas as pd
import numpy as np
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix

# Device set
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# Paths
data_dir = "HAM10000"
csv_path = os.path.join(data_dir, "HAM10000_metadata.csv")
df = pd.read_csv(csv_path)

# Image path load function
def load_image_path(row):
    folders = ['HAM10000_images_part_1', 'HAM10000_images_part_2']
    for folder in folders:
        path = os.path.join(data_dir, folder, row['image_id'] + ".jpg")
        if os.path.exists(path):
            return path
    return None

df['path'] = df.apply(load_image_path, axis=1)
df = df.dropna(subset=['path'])  # Drop rows where path not found

In [None]:
# Label mapping
label2idx = {label: idx for idx, label in enumerate(df['dx'].unique())}
idx2label = {idx: label for label, idx in label2idx.items()}
df['label'] = df['dx'].map(label2idx)

# Train-val split
train_df, val_df = train_test_split(df, stratify=df['label'], test_size=0.2, random_state=42)

print(label2idx)

{'bkl': 0, 'nv': 1, 'df': 2, 'mel': 3, 'vasc': 4, 'bcc': 5, 'akiec': 6}


In [None]:
# mel - 90  ---- 10% 90%   50%  oversampling
# nv - 90   ----- 90%     50% oversampling  contrastive learning

In [None]:
# Transformations
train_transform = A.Compose([
    A.SmallestMaxSize(max_size=256),
    A.RandomCrop(height=224, width=224),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.MotionBlur(p=0.3),
    A.ISONoise(p=0.3),
    A.HueSaturationValue(p=0.3),
    A.Normalize(),
    ToTensorV2()
])

val_transform = A.Compose([
    A.Resize(height=224, width=224),
    A.Normalize(),
    ToTensorV2()
])


In [None]:
# Dataset
class HAMDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = cv2.imread(row['path'])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = self.transform(image=image)['image']
        label = row['label']
        return image, label

train_ds = HAMDataset(train_df, train_transform)
val_ds = HAMDataset(val_df, val_transform)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64, shuffle=False)


# phase 1 -> img -> augment -> encoder -> projection [128/256] -> Supervised contrastive Loss -> [1792]
# phase 2  -> encoder (saved) -> freeze/Fine tune(full encdoer Or Some layer) + MLP (classification) -> prediction

In [None]:
# Encoder with EfficientNet + Projection Head
class EncoderWithProjection(nn.Module):
    def __init__(self, backbone='efficientnet_b0'):  # EfficientNetB0 used CNN pretrained model
        super().__init__()
        self.encoder = timm.create_model(backbone, pretrained=True, num_classes=0)
        self.projection = nn.Sequential(
            nn.Linear(self.encoder.num_features, 256),
            nn.ReLU(),
            nn.Linear(256, 128)
        )

    def forward(self, x):
        feat = self.encoder(x)
        proj = self.projection(feat)
        proj = nn.functional.normalize(proj, dim=1)
        return proj

In [None]:
#  Supervised Contrastive Loss
class SupConLoss(nn.Module):
    def __init__(self, temperature=0.07):
        super().__init__()
        self.temperature = temperature

    def forward(self, features, labels):
        device = features.device
        labels = labels.contiguous().view(-1, 1)
        mask = torch.eq(labels, labels.T).float().to(device)

        dot_product = torch.matmul(features, features.T) / self.temperature
        logits_max, _ = torch.max(dot_product, dim=1, keepdim=True)
        logits = dot_product - logits_max.detach()

        logits_mask = torch.ones_like(mask) - torch.eye(mask.size(0)).to(device)
        mask = mask * logits_mask

        exp_logits = torch.exp(logits) * logits_mask
        log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True) + 1e-9)

        mean_log_prob_pos = (mask * log_prob).sum(1) / (mask.sum(1) + 1e-9)
        loss = -mean_log_prob_pos.mean()
        return loss

In [None]:
#  Pretraining (Phase 1: Contrastive Learning)
model = EncoderWithProjection().to(device)
criterion = SupConLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)

def train():
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        features = model(images)
        loss = criterion(features, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    scheduler.step()
    return total_loss / len(train_loader)

print("Phase 1: Contrastive Pretraining Started...")
for epoch in range(100):
    loss = train()
    print(f"Epoch {epoch+1}, Loss: {loss:.4f}")

🔁 Phase 1: Contrastive Pretraining Started...
Epoch 1, Loss: 3.9550
Epoch 2, Loss: 3.8208
Epoch 3, Loss: 3.7549
Epoch 4, Loss: 3.6845
Epoch 5, Loss: 3.6104
Epoch 6, Loss: 3.5680
Epoch 7, Loss: 3.4909
Epoch 8, Loss: 3.4372
Epoch 9, Loss: 3.3779
Epoch 10, Loss: 3.3196
Epoch 11, Loss: 3.2664
Epoch 12, Loss: 3.2271
Epoch 13, Loss: 3.1695
Epoch 14, Loss: 3.1401
Epoch 15, Loss: 3.1422
Epoch 16, Loss: 3.0971
Epoch 17, Loss: 3.1019
Epoch 18, Loss: 3.0819
Epoch 19, Loss: 3.0918
Epoch 20, Loss: 3.0804
Epoch 21, Loss: 3.0825
Epoch 22, Loss: 3.0879
Epoch 23, Loss: 3.0851
Epoch 24, Loss: 3.0808
Epoch 25, Loss: 3.0743
Epoch 26, Loss: 3.0879
Epoch 27, Loss: 3.0919
Epoch 28, Loss: 3.0818
Epoch 29, Loss: 3.1029
Epoch 30, Loss: 3.0989
Epoch 31, Loss: 3.1138
Epoch 32, Loss: 3.1218
Epoch 33, Loss: 3.1514
Epoch 34, Loss: 3.1818
Epoch 35, Loss: 3.1482
Epoch 36, Loss: 3.1688
Epoch 37, Loss: 3.1693
Epoch 38, Loss: 3.1526
Epoch 39, Loss: 3.1639
Epoch 40, Loss: 3.1415
Epoch 41, Loss: 3.1528
Epoch 42, Loss: 3.12

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')


# # save_dir = "/content/drive/MyDrive/models"
# # os.makedirs(save_dir, exist_ok=True)

# # # Encoder Save
# # torch.save(model.state_dict(), os.path.join(save_dir, "efficientNet_encoder_pretrained.pth"))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#  Classifier with Fine-tuned Encoder (Phase 2)
class Classifier(nn.Module):
    def __init__(self, encoder, num_classes):
        super().__init__()
        self.encoder = encoder.encoder
        #  Unfreeze encoder for fine-tuning
        for param in self.encoder.parameters():
            param.requires_grad = True
        self.fc = nn.Linear(self.encoder.num_features, num_classes)

    def forward(self, x):
        feat = self.encoder(x)
        return self.fc(feat)

In [None]:
classifier = Classifier(model, len(label2idx)).to(device)

#  Focal Loss with tuned gamma=3
class FocalLoss(nn.Module):
    def __init__(self, weights=None, alpha=1, gamma=3):  #  gamma increased
        super(FocalLoss, self).__init__()
        self.weights = weights
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, inputs, targets):
        ce_loss = nn.functional.cross_entropy(inputs, targets, reduction='none', weight=self.weights)
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean()

# Class weight for imbalance
weights = compute_class_weight(class_weight='balanced',
                                classes=np.unique(df['label']),
                                y=df['label'])
weights = torch.tensor(weights, dtype=torch.float).to(device)

loss_fn = FocalLoss(weights=weights)
optimizer = torch.optim.Adam(classifier.parameters(), lr=1e-4)  # Lower LR for fine-tuning


In [None]:
#  Training loop (Phase 2)
print("Phase 2: Fine-tuning Classifier Started...")
for epoch in range(40):
    classifier.train()
    total_loss = 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        out = classifier(x)
        loss = loss_fn(out, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_loader)

    #  Validation
    classifier.eval()
    val_loss = 0
    correct, total = 0, 0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(device), y.to(device)
            out = classifier(x)
            loss = loss_fn(out, y)
            val_loss += loss.item()
            pred = out.argmax(dim=1)
            correct += (pred == y).sum().item()
            total += y.size(0)
            all_preds.extend(pred.cpu().numpy())
            all_labels.extend(y.cpu().numpy())

    avg_val_loss = val_loss / len(val_loader)
    val_acc = 100 * correct / total
    print(f"Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Accuracy: {val_acc:.2f}%")

🔁 Phase 2: Fine-tuning Classifier Started...
Epoch 1, Train Loss: 0.0418, Val Loss: 0.4333, Val Accuracy: 83.28%
Epoch 2, Train Loss: 0.0267, Val Loss: 0.4490, Val Accuracy: 84.62%
Epoch 3, Train Loss: 0.0096, Val Loss: 0.4571, Val Accuracy: 84.97%
Epoch 4, Train Loss: 0.0296, Val Loss: 0.4297, Val Accuracy: 84.42%
Epoch 5, Train Loss: 0.0197, Val Loss: 0.4547, Val Accuracy: 83.97%
Epoch 6, Train Loss: 0.0115, Val Loss: 0.4551, Val Accuracy: 85.07%
Epoch 7, Train Loss: 0.0084, Val Loss: 0.4573, Val Accuracy: 86.32%
Epoch 8, Train Loss: 0.0164, Val Loss: 0.4526, Val Accuracy: 86.32%
Epoch 9, Train Loss: 0.0124, Val Loss: 0.4514, Val Accuracy: 86.52%
Epoch 10, Train Loss: 0.0047, Val Loss: 0.4835, Val Accuracy: 86.27%
Epoch 11, Train Loss: 0.0042, Val Loss: 0.4815, Val Accuracy: 87.32%
Epoch 12, Train Loss: 0.0082, Val Loss: 0.4563, Val Accuracy: 86.97%
Epoch 13, Train Loss: 0.0041, Val Loss: 0.5026, Val Accuracy: 87.42%
Epoch 14, Train Loss: 0.0077, Val Loss: 0.5078, Val Accuracy: 86.42

In [None]:
torch.save(classifier.state_dict(), os.path.join(save_dir, "final_fine_tuning_classifier.pth"))

In [None]:
# Classification report
cm = confusion_matrix(all_labels, all_preds)
print("Confusion Matrix:\n", cm)
report = classification_report(all_labels, all_preds, target_names=[idx2label[i] for i in range(len(idx2label))])
print("Classification Report:\n", report)

Confusion Matrix:
 [[ 181   11    3    7    0    3   15]
 [  43 1193    6   66    1   29    3]
 [   0    1   15    3    0    1    3]
 [  21   26    2  166    0    3    5]
 [   0    2    0    0   21    5    0]
 [   2    1    0    1    0   96    3]
 [   9    1    2    2    0    4   47]]
Classification Report:
               precision    recall  f1-score   support

         bkl       0.71      0.82      0.76       220
          nv       0.97      0.89      0.93      1341
          df       0.54      0.65      0.59        23
         mel       0.68      0.74      0.71       223
        vasc       0.95      0.75      0.84        28
         bcc       0.68      0.93      0.79       103
       akiec       0.62      0.72      0.67        65

    accuracy                           0.86      2003
   macro avg       0.73      0.79      0.75      2003
weighted avg       0.87      0.86      0.86      2003



In [None]:
# Ensemble Accuracy: 87.56864702945582
#               precision    recall  f1-score   support

#          bkl       0.72      0.89      0.80       220
#           nv       0.97      0.91      0.94      1341
#           df       0.62      0.57      0.59        23
#          mel       0.71      0.74      0.73       223
#         vasc       0.76      0.79      0.77        28
#          bcc       0.76      0.87      0.81       103
#        akiec       0.69      0.71      0.70        65

#     accuracy                           0.88      2003
#    macro avg       0.75      0.78      0.76      2003
# weighted avg       0.88      0.88      0.88      2003

In [None]:
# Classification Report:
#                precision    recall  f1-score   support

#          bkl       0.66      0.89      0.76       220
#           nv       0.96      0.91      0.93      1341
#           df       0.65      0.65      0.65        23
#          mel       0.77      0.69      0.73       223
#         vasc       0.80      0.86      0.83        28
#          bcc       0.66      0.90      0.77       103
#        akiec       0.70      0.57      0.63        65

#     accuracy                           0.87      2003
#    macro avg       0.74      0.78      0.76      2003
# weighted avg       0.88      0.87      0.87      2003

In [None]:
# For predict without training
encoder = EncoderWithProjection()
encoder.load_state_dict(torch.load("/content/drive/MyDrive/models/efficientNet_encoder_pretrained.pth", map_location=device))

# classifier = Classifier(encoder, num_classes=len(label2idx))
classifier = Classifier(encoder, num_classes=7)
classifier.load_state_dict(torch.load("/content/drive/MyDrive/models/final_fine_tuning_classifier.pth", map_location=device))
classifier.to(device)
classifier.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Classifier(
  (encoder): EfficientNet(
    (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNormAct2d(
      32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn1): BatchNormAct2d(
            32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (aa): Identity()
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (act1): SiLU(inplace=True)
            (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv_pw): Con

In [None]:
def predict_image(path, topk=7):

    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    transform = A.Compose([
        A.Resize(224, 224),
        A.Normalize(),
        ToTensorV2()
    ])
    image_tensor = transform(image=image)['image'].unsqueeze(0).to(device)

    classifier.eval()
    with torch.no_grad():
        out = classifier(image_tensor)
        probs = torch.softmax(out, dim=1)

        top_probs, top_idxs = torch.topk(probs, k=topk, dim=1)
        top_probs = top_probs.cpu().numpy().flatten()
        top_idxs = top_idxs.cpu().numpy().flatten()

        print("Top Predictions:")
        for i in range(topk):
            label = idx2label[top_idxs[i]]
            prob = top_probs[i] * 100
            print(f"{label}: {prob:.2f}%")

        if top_probs[0] < 0.6:
            print(" Low confidence prediction. This image may not be a valid skin lesion.")

# Test on ham10000 dataset image
# predict_image("HAM10000/HAM10000_images_part_1/ISIC_0026754.jpg")
# idx2label = {'bkl': 0, 'nv': 1, 'df': 2, 'mel': 3, 'vasc': 4, 'bcc': 5, 'akiec': 6}

# Test on real image
predict_image("/content/images (1).jpg")


✅ Top Predictions:
mel: 98.36%
bkl: 1.14%
nv: 0.25%
akiec: 0.08%
vasc: 0.07%
df: 0.05%
bcc: 0.05%


In [None]:
# ✅ Real Image Prediction





