In [1]:
#finding the test accuracy
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, random_split, Dataset
from torch.utils.data import Subset
import wandb
from PIL import Image
from tabulate import tabulate
import numpy as np
import random
import shutil
import matplotlib.pyplot as plt

In [2]:


#  Load just color and texture
color_df   = pd.read_csv('/kaggle/input/color-features/color_features_full.csv')
texture_df = pd.read_csv('/kaggle/input/texture-features/glcm_ngtdm_features_full.csv')

#  Drop any “Class” column
for df in (color_df, texture_df):
    if 'Class' in df.columns:
        df.drop(columns=['Class'], inplace=True)

#  Merge on Image_ID and set index
features_df = (
    color_df
    .merge(texture_df, on='Image_ID')
    .set_index('Image_ID')
)

#  Collapse duplicates if any
features_df = features_df.groupby(level=0).first()

#  Keep only numeric columns
features_df = features_df.select_dtypes(include=[np.number])

# Record feature dimension
feature_dim = features_df.shape[1]

In [3]:
#  CUSTOM DATASET THAT RETURNS (img, tab_feats, label) 
class SkinDataset(Dataset):
    def __init__(self, root, transform, features_df):
        self.folder   = datasets.ImageFolder(root=root, transform=transform)
        self.features = features_df

    def __len__(self):
        return len(self.folder)

    def __getitem__(self, idx):
        # 1. load image+label
        img, label = self.folder[idx]

        # 2. extract Image_ID from the filepath
        path, _   = self.folder.samples[idx]
        image_id  = os.path.splitext(os.path.basename(path))[0]

        # 3. look up and convert the numeric features
        row = self.features.loc[image_id]              # pandas Series of floats/ints
        tab = torch.tensor(row.values, dtype=torch.float32)

        # 4. return the triplet
        return img, tab, label


In [5]:
#  TRANSFORMS & DATASET SPLITS 
IMG_SIZE = (224, 224)
transform_pipeline = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

# only for train+val
image_root      = '/kaggle/input/preprocessed-data/Optimized_output_final'
# only for test
test_image_root = '/kaggle/input/test-new/test_new'

random.seed(42)

# Build full dataset over image_root
full_ds = SkinDataset(
    root=image_root,
    transform=transform_pipeline,
    features_df=features_df
)
total      = len(full_ds)
# 80-20 division
n_test     = int(0.2 * total)
n_trainval = total - n_test
n_val      = int(0.1 * n_trainval)
n_train    = n_trainval - n_val

#Deterministic shuffle & take only train+val indices
all_idxs = list(range(total))
random.shuffle(all_idxs)
train_idxs = all_idxs[:n_train]
val_idxs   = all_idxs[n_train:n_train + n_val]

#  Create in‑memory Subsets for train & val
train_ds = Subset(full_ds, train_idxs)
val_ds   = Subset(full_ds, val_idxs)

#  Load fixed test split from disk
test_ds = SkinDataset(
    root=test_image_root,
    transform=transform_pipeline,
    features_df=features_df
)


#  PRINT TEST‐SET CLASS COUNTS 
from tabulate import tabulate

class_counts = []
for cls in sorted(os.listdir(test_image_root)):
    cls_dir = os.path.join(test_image_root, cls)
    if not os.path.isdir(cls_dir):
        continue
    cnt = len([
        f for f in os.listdir(cls_dir)
        if f.lower().endswith(('.png','.jpg','.jpeg'))
    ])
    class_counts.append((cls, cnt))

print(tabulate(class_counts, headers=['Class', '# Test Images']))
total_test = sum(count for _, count in class_counts)
print(f"\nTotal test images: {total_test}\n")


Class                                                        # Test Images
---------------------------------------------------------  ---------------
1. Eczema                                                              335
10. Warts Molluscum and other Viral Infections                         420
2. Melanoma                                                            628
3. Atopic Dermatitis                                                   251
4. Basal Cell Carcinoma                                                664
5. Melanocytic Nevi                                                   1594
6. Benign Keratosis-like Lesions                                       415
7. Psoriasis pictures Lichen Planus and related diseases               411
8. Seborrheic Keratoses and other Benign Tumors                        369
9. Tinea Ringworm Candidiasis and other Fungal Infections              340

Total test images: 5427



In [6]:
# Configure device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Running on device:", device)

Running on device: cuda


In [7]:
def show_predictions_on_test(model, dataset, feature_dim, device='cuda'):
    model.eval()
    # get class names
    classes = dataset.folder.classes

    # pick 30 random samples
    indices = random.sample(range(len(dataset)), 30)
    imgs, tabs, trues, preds = [], [], [], []

    # first collect images, tabs, true labels
    for idx in indices:
        img, tab, lbl = dataset[idx]
        imgs.append(img)
        tabs.append(tab)
        trues.append(classes[lbl])

    # then run predictions
    with torch.no_grad():
        for img, tab in zip(imgs, tabs):
            img_b = img.unsqueeze(0).to(device)
            tab_b = tab.unsqueeze(0).to(device)
            out   = model(img_b, tab_b)
            p     = out.argmax(dim=1).item()
            preds.append(classes[p])

    # plot 10×3 grid
    fig, axes = plt.subplots(10, 3, figsize=(12, 30))
    fig.suptitle("Test Data Predictions (Best Model)", fontsize=16)

    for ax, img, t, p in zip(axes.flat, imgs, trues, preds):
        im = img.permute(1, 2, 0).cpu().numpy()
        im = np.clip((im * 0.5) + 0.5, 0, 1)
        ax.imshow(im)
        ax.set_title(f"True: {t}\nPred: {p}")
        ax.axis("off")

    plt.tight_layout(rect=[0, 0, 1, 0.96])
    wandb.log({"Sample Predictions": wandb.Image(fig)})
    plt.close(fig)

In [8]:
#  EXTEND ResNet TO CONCAT TABULAR FEATURES
class ResNetWithTabular(nn.Module):
    def __init__(self, base_model, tab_dim, num_classes=10):
        super().__init__()
        # everything except the final fc
        self.backbone = nn.Sequential(*list(base_model.children())[:-1])  # output: [B,2048,1,1]
        self.tab_bn   = nn.BatchNorm1d(tab_dim)
        self.classifier = nn.Sequential(
            nn.Flatten(),                          # flatten image feats
            # image feature dim = base_model.fc.in_features
            # + tabular dim
            nn.Linear(base_model.fc.in_features + tab_dim, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, img, tab):
        x = self.backbone(img)       # [B,2048,1,1]
        x = torch.flatten(x, 1)      # [B,2048]
        tab = self.tab_bn(tab)       # normalize tabular
        x = torch.cat([x, tab], dim=1)
        return self.classifier(x)


In [9]:
#  FINE‑TUNING CLASS 
class FineTuneCNN:
    def __init__(self, train_ds, valid_ds, base_model, batch_size=32, freeze_ratio=1.0, test_ds=None):
        self.model = base_model
        # freeze parameters
        if freeze_ratio >= 1.0:
            for p in self.model.parameters():
                p.requires_grad = False
        else:
            total_p = sum(1 for _ in self.model.parameters())
            to_freeze = int(total_p * freeze_ratio)
            cnt = 0
            for p in self.model.parameters():
                p.requires_grad = False
                cnt += 1
                if cnt >= to_freeze:
                    break
        # ensure final layers are trainable
        for p in self.model.classifier.parameters():
            p.requires_grad = True

        # override the default loaders
        self.train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
        self.valid_loader = DataLoader(valid_ds, batch_size=batch_size, shuffle=False)
        self.test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False) if test_ds else None

    def run_training(self, num_epochs=10, learning_rate=1e-3, weight_decay_val=0):
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.model.parameters(),
                               lr=learning_rate,
                               weight_decay=weight_decay_val)

        for epoch in range(1, num_epochs+1):
            # --- train ---
            self.model.train()
            running_correct = 0
            running_total   = 0
            for img, tab, lbl in self.train_loader:
                img, tab, lbl = img.to(device), tab.to(device), lbl.to(device)
                optimizer.zero_grad()
                out = self.model(img, tab)
                loss = criterion(out, lbl)
                loss.backward()
                optimizer.step()
                pred = out.argmax(dim=1)
                running_correct += (pred == lbl).sum().item()
                running_total   += lbl.size(0)
            train_acc = 100 * running_correct / running_total
            print(f"Epoch {epoch} — Train Acc: {train_acc:.2f}%")
            wandb.log({"epoch":epoch, "train_acc":train_acc})

            # --- validate ---
            self.model.eval()
            val_corr = 0
            val_tot  = 0
            val_loss = 0.0
            with torch.no_grad():
                for img, tab, lbl in self.valid_loader:
                    img, tab, lbl = img.to(device), tab.to(device), lbl.to(device)
                    out = self.model(img, tab)
                    l  = criterion(out, lbl)
                    pred = out.argmax(dim=1)
                    val_corr += (pred == lbl).sum().item()
                    val_tot  += lbl.size(0)
                    val_loss  = l.item()
            val_acc = 100 * val_corr / val_tot
            print(f"Epoch {epoch} — Val   Acc: {val_acc:.2f}%")
            wandb.log({"validation_accuracy": val_acc, "validation_loss": val_loss})

    def evaluate_test(self):
        if self.test_loader is None:
            print("No test set.")
            return
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.eval()
        test_corr = 0
        test_tot  = 0
        with torch.no_grad():
            for img, tab, lbl in self.test_loader:
                img, tab, lbl = img.to(device), tab.to(device), lbl.to(device)
                out = self.model(img, tab)
                pred = out.argmax(dim=1)
                test_corr += (pred == lbl).sum().item()
                test_tot  += lbl.size(0)
        test_acc = 100 * test_corr / test_tot
        print(f"Test Accuracy: {test_acc:.2f}%")
        wandb.log({"test_accuracy": test_acc})



In [10]:
import wandb
import numpy as np
from types import SimpleNamespace
import random

In [11]:
wandb.login(key='1df7a902fa4a610500b8e79e21818419d5facdbb')

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mma23m018[0m ([33mma23m018-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [12]:
# BEST VALIDATION CONFIGS
BEST_VAL_CONFIGS = [
    {
        'learning_rate': 1e-4,
        'freeze_ratio':  0.2,
        'l2_reg':        0,
        'batch_size':    64,
        'epochs':        10
    }
    

]

# SWEEP CONFIG TO MAXIMIZE test_accuracy
sweep_config = {
    'method':  'bayes',
    'metric':  { 'name': 'test_accuracy', 'goal': 'maximize' },
    'parameters': {
        'config_idx': { 'values': list(range(len(BEST_VAL_CONFIGS))) }
    }
}
sweep_id = wandb.sweep(sweep_config, entity= "ma23m018-indian-institute-of-technology-madras", project="mtech_project1_test2")

Create sweep with ID: jenasips
Sweep URL: https://wandb.ai/ma23m018-indian-institute-of-technology-madras/mtech_project1_test2/sweeps/jenasips


In [12]:
#sweep_id = wandb.sweep(sweep_config, entity= "ma23m018-indian-institute-of-technology-madras", project="mtech_project1_test1")

Create sweep with ID: 7bxbulmu
Sweep URL: https://wandb.ai/ma23m018-indian-institute-of-technology-madras/mtech_project1_test1/sweeps/7bxbulmu


In [13]:
#
def main():
    with wandb.init() as run:
        idx = run.config.config_idx
        cfg = BEST_VAL_CONFIGS[idx]
        run.config.update(cfg, allow_val_change=False)
        run.name = (f"bs{cfg['batch_size']}"
                    f"_ep{cfg['epochs']}"
                    f"_lr{cfg['learning_rate']}"
                    f"_fr{cfg['freeze_ratio']}")

        #rebuild model
        base = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
        model = ResNetWithTabular(
            base_model=base,
            tab_dim=feature_dim,
            num_classes=10
        )

        #initialize finetuner *with* the test set
        finetuner = FineTuneCNN(
            train_ds=train_ds,     # from  existing train/val split
            valid_ds=val_ds,
            base_model=model,
            batch_size=cfg['batch_size'],
            freeze_ratio=cfg['freeze_ratio'],
            test_ds=test_ds       
        )

        #train+validate, then evaluate on test
        finetuner.run_training(
            num_epochs=cfg['epochs'],
            learning_rate=cfg['learning_rate'],
            weight_decay_val=cfg['l2_reg']
        )
        finetuner.evaluate_test()
        show_predictions_on_test(
        model=finetuner.model,
         dataset=test_ds,
         feature_dim=feature_dim,
         device=device)


#  SWEEP 
wandb.agent(sweep_id, function=main, count=len(BEST_VAL_CONFIGS))

[34m[1mwandb[0m: Agent Starting Run: 8sfldfcc with config:
[34m[1mwandb[0m: 	config_idx: 0
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 198MB/s] 


Epoch 1 — Train Acc: 59.31%
Epoch 1 — Val   Acc: 67.45%
Epoch 2 — Train Acc: 70.97%
Epoch 2 — Val   Acc: 69.11%
Epoch 3 — Train Acc: 78.50%
Epoch 3 — Val   Acc: 69.38%
Epoch 4 — Train Acc: 85.89%
Epoch 4 — Val   Acc: 73.48%
Epoch 5 — Train Acc: 91.48%
Epoch 5 — Val   Acc: 70.86%
Epoch 6 — Train Acc: 94.70%
Epoch 6 — Val   Acc: 70.49%
Epoch 7 — Train Acc: 96.08%
Epoch 7 — Val   Acc: 70.76%
Epoch 8 — Train Acc: 97.07%
Epoch 8 — Val   Acc: 70.07%
Epoch 9 — Train Acc: 96.98%
Epoch 9 — Val   Acc: 71.04%
Epoch 10 — Train Acc: 97.43%
Epoch 10 — Val   Acc: 70.72%
Test Accuracy: 90.55%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
train_acc,▁▃▅▆▇▇████
validation_accuracy,▁▃▃█▅▅▅▄▅▅
validation_loss,▂▂▃▂▁▆█▅▂▅

0,1
epoch,10.0
test_accuracy,90.54726
train_acc,97.42724
validation_accuracy,70.71823
validation_loss,1.00704
