In [1]:
!pip install transformers
!pip install torch torchvision

[0m

In [2]:
import pandas as pd
import numpy as np
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from scipy.stats import entropy
from collections import defaultdict
import argparse
from matplotlib import pyplot as plt
from transformers import ViTFeatureExtractor, ViTModel
import torch
import requests

train_df = pd.read_csv('train_dataset/metadata.csv')
test_df = pd.read_csv('test_dataset/metadata.csv')
val_df = pd.read_csv('validation_dataset/metadata.csv')

In [3]:
print(train_df['diagnosis'].value_counts())

diagnosis
nevus                         1205
melanoma                      1113
pigmented benign keratosis    1099
basal cell carcinoma           514
squamous cell carcinoma        197
vascular lesion                142
actinic keratosis              130
dermatofibroma                 115
Name: count, dtype: int64


In [4]:
class_mapping = {
    "actinic keratosis": 0,
    "basal cell carcinoma": 1,
    "dermatofibroma": 2,
    "melanoma": 3,
    "nevus": 4,
    "pigmented benign keratosis": 5,
    "squamous cell carcinoma": 6,
    "vascular lesion":7
}
class_mapping

{'actinic keratosis': 0,
 'basal cell carcinoma': 1,
 'dermatofibroma': 2,
 'melanoma': 3,
 'nevus': 4,
 'pigmented benign keratosis': 5,
 'squamous cell carcinoma': 6,
 'vascular lesion': 7}

In [5]:
from PIL import Image
import os

import torchvision.transforms as transforms

# Define image transformations (resize, convert to tensor, and normalize)
transform = transforms.Compose([
    transforms.Resize((224, 224)),          # Resize images to 224x224 (matching ResNet input size)
    transforms.ToTensor()
])

class Dataset():
    def __init__(self, dataframe, transform, train='train'):
        self.dataframe=dataframe
        self.train = train
        self.transform = transform
        self.path_to_image=self._create_path_to_image_dict()
        self.paths=list(self.path_to_image.keys())
        self.labels=list(self.path_to_image.values())

    def _create_path_to_image_dict(self):
      path_to_image={}
      for index,row in self.dataframe.iterrows():
        if self.train == 'train':
          img_path = os.path.join('train_dataset/',row['isic_id']+'.jpg')
        elif self.train == 'test':
          img_path = os.path.join('test_dataset/',row['isic_id']+'.jpg')
        else:
            img_path = os.path.join('val_dataset/',row['isic_id']+'.jpg')
        label=row['diagnosis']
        path_to_image[img_path]=label
      return path_to_image

    def __len__(self):
        return len(self.paths)

    def __getitem__(self,index):
        img_path=self.paths[index]
        img_label=self.labels[index]
        image=Image.open(img_path)
        image=self.transform(image)
        if self.train == 'val':
            return image, class_mapping[img_label], index
        return image, img_label, index

In [6]:
train_df = Dataset(train_df, transform)

In [7]:
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')



In [8]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import os
import pandas as pd
from torch.utils.data import DataLoader
from sklearn.cluster import KMeans
import numpy as np

train_loader = DataLoader(train_df, batch_size=32, shuffle=True)


def extract_vae_features(dataloader, model):
    features_list = []
    indices_list = []
    
    with torch.no_grad():
        for images, _, indices in dataloader:
            # images = images.to(device)  # Move images to GPU if available
            images_list = [transforms.ToPILImage()(img) for img in images]
            inputs = feature_extractor(images=images_list, return_tensors="pt")
            with torch.no_grad():
                outputs = model(**inputs)
            
            x = outputs.last_hidden_state[:, 0, :]
            features_list.append(x.cpu().numpy())  # Move to CPU and convert to NumPy
            
            # Collect indices
            indices_list.extend(indices)
            
    # Stack all features into a 2D array (n_samples, hidden_dim)
    features = np.vstack(features_list)
    
    return features, indices_list

# Extract features using the model
train_features, train_indices = extract_vae_features(train_loader, model)
print(train_features.shape)
# Apply K-Means clustering on the train_features
n_clusters = 30
kmeans = KMeans(n_clusters=n_clusters, init='k-means++', random_state=0)
kmeans.fit(train_features)

# Get cluster labels for each image
cluster_labels = kmeans.labels_

(4515, 768)


In [11]:
train_cluster_labels = kmeans.labels_

def get_representative_images(cluster_labels, indices):
    cluster_to_images = {}
    for i in range(kmeans.n_clusters):
        cluster_indices = np.where(kmeans.labels_ == i)[0]
        cluster_features = train_features[cluster_indices]
        distances = np.linalg.norm(cluster_features - kmeans.cluster_centers_[i], axis=1)
        nearest_indices = cluster_indices[np.argsort(distances)[:2]]
        cluster_to_images[i] = [indices[idx] for idx in nearest_indices]
    return cluster_to_images

representative_images = get_representative_images(train_cluster_labels, train_indices)
print(representative_images)

{0: [tensor(1372), tensor(3158)], 1: [tensor(1277), tensor(1940)], 2: [tensor(1255), tensor(1121)], 3: [tensor(1423), tensor(2966)], 4: [tensor(2925), tensor(3946)], 5: [tensor(1963), tensor(397)], 6: [tensor(2335), tensor(4399)], 7: [tensor(1923), tensor(1874)], 8: [tensor(3791), tensor(3672)], 9: [tensor(1239), tensor(121)], 10: [tensor(909), tensor(1329)], 11: [tensor(134), tensor(1201)], 12: [tensor(1547), tensor(562)], 13: [tensor(3931), tensor(3602)], 14: [tensor(2467), tensor(1928)], 15: [tensor(2832), tensor(4365)], 16: [tensor(1789), tensor(1918)], 17: [tensor(3022), tensor(2914)], 18: [tensor(2424), tensor(2243)], 19: [tensor(780), tensor(2798)], 20: [tensor(2412), tensor(4219)], 21: [tensor(3038), tensor(2026)], 22: [tensor(2158), tensor(630)], 23: [tensor(3335), tensor(114)], 24: [tensor(1868), tensor(2806)], 25: [tensor(1771), tensor(812)], 26: [tensor(2015), tensor(810)], 27: [tensor(1535), tensor(1206)], 28: [tensor(710), tensor(3034)], 29: [tensor(3007), tensor(901)]}


In [13]:
train_df.__getitem__(representative_images[0][1])

(tensor([[[0.8235, 0.8235, 0.8235,  ..., 0.8118, 0.8039, 0.7922],
          [0.8275, 0.8196, 0.8157,  ..., 0.8078, 0.7961, 0.7922],
          [0.8275, 0.8235, 0.8196,  ..., 0.8118, 0.8000, 0.8000],
          ...,
          [0.7961, 0.8039, 0.8078,  ..., 0.8118, 0.8118, 0.8118],
          [0.7961, 0.7961, 0.7961,  ..., 0.8078, 0.8078, 0.8078],
          [0.8000, 0.7922, 0.7843,  ..., 0.8078, 0.8039, 0.8039]],
 
         [[0.7255, 0.7255, 0.7255,  ..., 0.7059, 0.6980, 0.6863],
          [0.7333, 0.7294, 0.7216,  ..., 0.7098, 0.6941, 0.6902],
          [0.7373, 0.7333, 0.7294,  ..., 0.7137, 0.7020, 0.7020],
          ...,
          [0.6824, 0.6902, 0.6941,  ..., 0.7333, 0.7333, 0.7333],
          [0.6784, 0.6824, 0.6824,  ..., 0.7294, 0.7294, 0.7294],
          [0.6784, 0.6706, 0.6667,  ..., 0.7294, 0.7255, 0.7333]],
 
         [[0.7412, 0.7412, 0.7412,  ..., 0.7294, 0.7216, 0.7059],
          [0.7569, 0.7490, 0.7451,  ..., 0.7373, 0.7216, 0.7137],
          [0.7608, 0.7569, 0.7529,  ...,

In [10]:
list(ids.item() for l in representative_images.values() for ids in l)

[1372,
 1277,
 1255,
 1423,
 2925,
 1963,
 2335,
 1923,
 3791,
 1239,
 909,
 134,
 1547,
 3931,
 2467,
 2832,
 1789,
 3022,
 2424,
 780,
 2412,
 3038,
 2158,
 3335,
 1868,
 1771,
 2015,
 1535,
 710,
 3007]