In [None]:
#Connect to Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#Generate image folder and test, val and train csvs 
import numpy as np
import pandas as pd
import os
from PIL import Image

dataset_path = '/content/drive/MyDrive/DermaMNIST/dermamnist_128.npz'
data = np.load(dataset_path)

train_images = data['train_images']
train_labels = data['train_labels']
val_images = data['val_images']
val_labels = data['val_labels']
test_images = data['test_images']
test_labels = data['test_labels']

def save_image_array(image_array, output_path):
    image = Image.fromarray(image_array.astype('uint8'))
    image.save(output_path)

output_dir = '/content/drive/MyDrive/DermaMNIST/images'

def create_image_file_paths(images, labels, dataset_type):
    image_paths = []
    label_list = []

    for i, (image, label) in enumerate(zip(images, labels)):
        image_filename = f'{dataset_type}_image_{i}.png'
        image_filepath = os.path.join(output_dir, image_filename)

        save_image_array(image, image_filepath)

        image_paths.append(image_filepath)
        label_list.append(label)

    return image_paths, label_list

train_image_paths, train_labels_list = create_image_file_paths(train_images, train_labels, 'train')
val_image_paths, val_labels_list = create_image_file_paths(val_images, val_labels, 'val')
test_image_paths, test_labels_list = create_image_file_paths(test_images, test_labels, 'test')

train_df = pd.DataFrame({'image_path': train_image_paths, 'label': train_labels_list})
val_df = pd.DataFrame({'image_path': val_image_paths, 'label': val_labels_list})
test_df = pd.DataFrame({'image_path': test_image_paths, 'label': test_labels_list})

train_df.to_csv('train_dataset.csv', index=False)
val_df.to_csv('val_dataset.csv', index=False)
test_df.to_csv('test_dataset.csv', index=False)

combined_df = pd.concat([train_df, val_df, test_df], ignore_index=True)

combined_df.to_csv('combined_dataset.csv', index=False)

print("Images saved and CSV files have been created for train, validation, and test datasets.")


KeyboardInterrupt: 

In [None]:
!pip install torch torchvision segmentation-models-pytorch pandas pillow

Collecting segmentation-models-pytorch
  Downloading segmentation_models_pytorch-0.3.4-py3-none-any.whl.metadata (30 kB)
Collecting efficientnet-pytorch==0.7.1 (from segmentation-models-pytorch)
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pretrainedmodels==0.7.4 (from segmentation-models-pytorch)
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting timm==0.9.7 (from segmentation-models-pytorch)
  Downloading timm-0.9.7-py3-none-any.whl.metadata (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Collecting munch (from pretrainedmodels==0.7.4->segmentation-models-pytorch)
  Downloading munch-4.0.0-py2.py3-none-any.whl.metadata (5.9 kB)
Downloading segm

In [None]:
!pip install opencv-python pandas pillow numpy matplotlib



In [None]:
# To move csv files from local /content to specific Drive
import os
import shutil

source_dir = "/content/"  

destination_dir = "/content/drive/MyDrive/DermaMNIST/"
os.makedirs(destination_dir, exist_ok=True)  

csv_files = ["test_dataset.csv", "val_dataset.csv", "train_dataset.csv"]

for csv_file in csv_files:
    source_path = os.path.join(source_dir, csv_file)
    destination_path = os.path.join(destination_dir, csv_file)

    if os.path.exists(source_path):  
        shutil.move(source_path, destination_path)
        print(f"Moved {csv_file} to {destination_dir}")
    else:
        print(f"File not found: {source_path}")

File not found: /content/test_dataset.csv
File not found: /content/val_dataset.csv
File not found: /content/train_dataset.csv


In [None]:
# Add Mask Path to csvs
import os
import pandas as pd
import cv2

image_dir = "/content/drive/MyDrive/DermaMNIST/images"  
mask_dir = "/content/drive/MyDrive/DermaMNIST/masks"   
os.makedirs(mask_dir, exist_ok=True)  

train_csv_path = "/content/drive/MyDrive/DermaMNIST/train_dataset.csv"
val_csv_path = "/content/drive/MyDrive/DermaMNIST/val_dataset.csv"
test_csv_path = "/content/drive/MyDrive/DermaMNIST/test_dataset.csv"

train_updated_csv_path = "/content/drive/MyDrive/DermaMNIST/train_dataset_with_masks.csv"
val_updated_csv_path = "/content/drive/MyDrive/DermaMNIST/val_dataset_with_masks.csv"
test_updated_csv_path = "/content/drive/MyDrive/DermaMNIST/test_dataset_with_masks.csv"

def generate_mask_with_opencv(image_path, threshold, mask_dir):

    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise FileNotFoundError(f"Image not found: {image_path}")

    _, mask = cv2.threshold(image, threshold, 255, cv2.THRESH_BINARY)

    mask_filename = os.path.basename(image_path).replace(".png", "_mask.png")
    mask_path = os.path.join(mask_dir, mask_filename)
    cv2.imwrite(mask_path, mask)

    return mask_path

def process_dataset_with_masks(csv_path, updated_csv_path, image_dir, mask_dir, threshold):

    df = pd.read_csv(csv_path)

    df['mask_path'] = df['image_path'].apply(
        lambda img_path: generate_mask_with_opencv(os.path.join(image_dir, os.path.basename(img_path)), threshold, mask_dir)
    )

    df.to_csv(updated_csv_path, index=False)
    print(f"Updated CSV saved to {updated_csv_path}")

threshold = 110

process_dataset_with_masks(train_csv_path, train_updated_csv_path, image_dir, mask_dir, threshold)
process_dataset_with_masks(val_csv_path, val_updated_csv_path, image_dir, mask_dir, threshold)
process_dataset_with_masks(test_csv_path, test_updated_csv_path, image_dir, mask_dir, threshold)

print(f"Masks generated and paths added to train, val, and test datasets.")


KeyboardInterrupt: 

In [None]:
# Modified Unet
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as T
import segmentation_models_pytorch as smp
import pandas as pd
from PIL import Image
import os
import numpy as np

def clean_labels(df):
    return df

train_df = clean_labels(pd.read_csv('/content/drive/MyDrive/DermaMNIST/train_dataset_with_masks.csv'))
val_df = clean_labels(pd.read_csv('/content/drive/MyDrive/DermaMNIST/val_dataset_with_masks.csv'))
test_df = clean_labels(pd.read_csv('/content/drive/MyDrive/DermaMNIST/test_dataset_with_masks.csv'))

def validate_image_paths(df, base_dir):
    def check_image_exists(path):
        full_path = os.path.join(base_dir, os.path.basename(path))
        return full_path if os.path.exists(full_path) else None
    df['image_path'] = df['image_path'].apply(check_image_exists)
    return df.dropna(subset=['image_path']).reset_index(drop=True)

base_dir = "/content/drive/MyDrive/DermaMNIST/images"
train_df = validate_image_paths(train_df, base_dir)
val_df = validate_image_paths(val_df, base_dir)
test_df = validate_image_paths(test_df, base_dir)

class SegmentationDataset(Dataset):
    def __init__(self, df, base_dir, transforms=None):
        self.df = df
        self.base_dir = base_dir
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_path = row['image_path']
        mask_path = row['mask_path']
        image = Image.open(image_path).convert("RGB")
        mask = Image.open(mask_path).convert("L")
        if self.transforms:
            image = self.transforms(image)
            mask = self.transforms(mask)
        return image, mask.long()

data_transforms = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
])

train_dataset = SegmentationDataset(train_df, base_dir, transforms=data_transforms)
val_dataset = SegmentationDataset(val_df, base_dir, transforms=data_transforms)
test_dataset = SegmentationDataset(test_df, base_dir, transforms=data_transforms)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

model = smp.Unet(
    encoder_name="resnet34",
    encoder_weights="imagenet",
    in_channels=3,
    classes=3,
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

for epoch in range(5):
    model.train()
    running_loss = 0.0
    for images, masks in train_loader:
        images, masks = images.to(device), masks.to(device)
        outputs = model(images)
        loss = loss_fn(outputs, masks)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

model.eval()
with torch.no_grad():
    for images, masks in test_loader:
        images, masks = images.to(device), masks.to(device)
        outputs = model(images)
        preds = torch.argmax(outputs, dim=1)

Downloading: "https://download.pytorch.org/models/resnet34-333f7ec4.pth" to /root/.cache/torch/hub/checkpoints/resnet34-333f7ec4.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 218MB/s]


Epoch 1, Loss: 0.1885171606966622
Epoch 2, Loss: 0.07587418446666029
Epoch 3, Loss: 0.05629654589734257
Epoch 4, Loss: 0.04516453313381841
Epoch 5, Loss: 0.039886192125204493


In [None]:
# Standard UNet
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from pathlib import Path
import numpy as np
from tqdm import tqdm

class DermaMNISTClassifier(nn.Module):
    def __init__(self, n_channels=3, n_classes=7):
        super().__init__()

        self.enc1 = nn.Sequential(
            nn.Conv2d(n_channels, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.enc2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True)
        )
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.enc3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True)
        )
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(128, n_classes)
        )

    def forward(self, x):
        x = self.enc1(x)
        x = self.pool1(x)
        x = self.enc2(x)
        x = self.pool2(x)
        x = self.enc3(x)
        x = self.pool3(x)

        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

def prepare_data(data_dir, batch_size=32):

    train_images = np.load(Path(data_dir) / 'train_images.npy')
    train_labels = np.load(Path(data_dir) / 'train_labels.npy')
    val_images = np.load(Path(data_dir) / 'val_images.npy')
    val_labels = np.load(Path(data_dir) / 'val_labels.npy')

    train_images = np.transpose(train_images, (0, 3, 1, 2))
    val_images = np.transpose(val_images, (0, 3, 1, 2))

    train_labels = train_labels.squeeze()
    val_labels = val_labels.squeeze()

    train_images = torch.from_numpy(train_images).float()
    train_labels = torch.from_numpy(train_labels).long()
    val_images = torch.from_numpy(val_images).float()
    val_labels = torch.from_numpy(val_labels).long()

    train_dataset = TensorDataset(train_images, train_labels)
    val_dataset = TensorDataset(val_images, val_labels)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader

def train_model(model, train_loader, val_loader, num_epochs=10, device='cuda'):

    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    best_val_loss = float('inf')

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0

        for batch_idx, (images, labels) in enumerate(tqdm(train_loader, desc=f'Epoch {epoch+1}')):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            train_total += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()

        train_loss = train_loss / len(train_loader)
        train_acc = 100. * train_correct / train_total

        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()

        val_loss = val_loss / len(val_loader)
        val_acc = 100. * val_correct / val_total

        print(f'Epoch [{epoch+1}/{num_epochs}]')
        print(f'Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%')

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_derma_model.pth')
            print('Saved best model!')

        print('-' * 70)

if __name__ == "__main__":
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    model = DermaMNISTClassifier(n_channels=3, n_classes=7)

    data_dir = './medmnist_data/dermamnist'
    train_loader, val_loader = prepare_data(data_dir, batch_size=32)

    train_model(model, train_loader, val_loader, num_epochs=100, device=device)


In [None]:
# Accuracy and AUC
import torch
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.preprocessing import label_binarize
import numpy as np

model.eval()
all_true_labels = []
all_predicted_probs = []

with torch.no_grad():
    for images, masks in test_loader:
        images, masks = images.to(device), masks.to(device)
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1).cpu().numpy()
        all_true_labels.extend(masks.cpu().numpy().flatten())
        all_predicted_probs.extend(probs)

all_true_labels = np.array(all_true_labels)
all_predicted_probs = np.array(all_predicted_probs)

num_classes = all_predicted_probs.shape[1]
all_true_labels_one_hot = label_binarize(all_true_labels, classes=list(range(num_classes)))

predicted_classes = np.argmax(all_predicted_probs, axis=1)

accuracy = accuracy_score(all_true_labels, predicted_classes)
roc_auc = roc_auc_score(all_true_labels_one_hot, all_predicted_probs, multi_class="ovr", average="macro")

print(f"Accuracy: {accuracy:.4f}")
print(f"ROC-AUC: {roc_auc:.4f}")

In [None]:
# Save Model
save_path = "unet_model.pth" 
torch.save(model.state_dict(), save_path)
print(f"Model saved to {save_path}")

Model saved to unet_model.pth
