<a href="https://colab.research.google.com/github/kinan-02/SkinCancer-AL/blob/main/SkinCancer-AL%20/Initials/kmeans_auto_encoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from scipy.stats import entropy
from collections import defaultdict
import argparse
from matplotlib import pyplot as plt
from transformers import ViTFeatureExtractor, ViTModel
import torch
import requests

train_df = pd.read_csv('train_dataset/metadata.csv')
test_df = pd.read_csv('test_dataset/metadata.csv')
val_df = pd.read_csv('validation_dataset/metadata.csv')

In [None]:
print(train_df['diagnosis'].value_counts())

diagnosis
nevus                         1205
melanoma                      1113
pigmented benign keratosis    1099
basal cell carcinoma           514
squamous cell carcinoma        197
vascular lesion                142
actinic keratosis              130
dermatofibroma                 115
Name: count, dtype: int64


In [None]:
class_mapping = {
    "actinic keratosis": 0,
    "basal cell carcinoma": 1,
    "dermatofibroma": 2,
    "melanoma": 3,
    "nevus": 4,
    "pigmented benign keratosis": 5,
    "squamous cell carcinoma": 6,
    "vascular lesion":7
}
class_mapping

{'actinic keratosis': 0,
 'basal cell carcinoma': 1,
 'dermatofibroma': 2,
 'melanoma': 3,
 'nevus': 4,
 'pigmented benign keratosis': 5,
 'squamous cell carcinoma': 6,
 'vascular lesion': 7}

In [None]:
from PIL import Image
import os

import torchvision.transforms as transforms

# Define image transformations (resize, convert to tensor)
transform = transforms.Compose([
    transforms.Resize((224, 224)),          # Resize images to 224x224 (matching ResNet input size)
    transforms.ToTensor()
])

# we made this class to read the data from the file and to use it later in the dataloader
class Dataset():
    def __init__(self, dataframe, transform, train='train'):
        self.dataframe=dataframe
        self.train = train
        self.transform = transform
        self.path_to_image=self._create_path_to_image_dict()
        self.paths=list(self.path_to_image.keys())
        self.labels=list(self.path_to_image.values())

    #
    def _create_path_to_image_dict(self):
    """
    Return the dictionary where the keys are the image paths and the values are the labels.
    """
      path_to_image={}
      for index,row in self.dataframe.iterrows():
        if self.train == 'train':
          img_path = os.path.join('train_dataset/',row['isic_id']+'.jpg')
        elif self.train == 'test':
          img_path = os.path.join('test_dataset/',row['isic_id']+'.jpg')
        else:
            img_path = os.path.join('val_dataset/',row['isic_id']+'.jpg')
        label=row['diagnosis']
        path_to_image[img_path]=label
      return path_to_image

    def __len__(self):
        return len(self.paths)


    def __getitem__(self,index):
        """
         return the image , image label (after the mapping) and the index
        """
        img_path=self.paths[index]
        img_label=self.labels[index]
        image=Image.open(img_path)
        image=self.transform(image)
        if self.train == 'val':
            return image, class_mapping[img_label], index
        return image, img_label, index

In [None]:
train_df = Dataset(train_df, transform)

In [None]:
import torch
import torch.nn as nn

#This is the class of the AutoEncoder that we  build to extract the latent vector for each image.
class Autoencoder(nn.Module):
    def __init__(self, encoded_dim=256):
        super(Autoencoder, self).__init__()

        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),  # input channels=3 for RGB, output channels=32
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),                   # (112x112)
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),                   # (56x56)
        )

        # Flatten and fully connected layer to get 1D vector
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(32 * 56 * 56, encoded_dim)  # From 32x56x56 to encoded_dim

        # Decoder
        self.fc2 = nn.Linear(encoded_dim, 32 * 56 * 56)  # Fully connected to expand back
        self.decoder = nn.Sequential(
            nn.ReLU(),
            nn.Unflatten(1, (32, 56, 56)),               # Reshape 1D vector back to (32x56x56)
            nn.ConvTranspose2d(32, 32, kernel_size=3, stride=2, padding=1, output_padding=1), # (112x112)
            nn.ReLU(),
            nn.ConvTranspose2d(32, 32, kernel_size=3, stride=2, padding=1, output_padding=1), # (224x224)
            nn.ReLU(),
            nn.Conv2d(32, 3, kernel_size=3, padding=1),  # Output channels=3 for RGB
            nn.Sigmoid()  # Output should be between 0 and 1 for normalized RGB
        )

    def forward(self, x):
        # Encoding
        x = self.encoder(x)
        x = self.flatten(x)         # Flatten to 1D vector
        x = self.fc1(x)             # Project to encoded_dim

        # Decoding
        x = self.fc2(x)             # Expand back to match the flattened shape of feature maps
        x = self.decoder(x)         # Pass through the decoder
        return x


model = Autoencoder()
model.load_state_dict(torch.load(f"ae_model.pth"))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

Autoencoder(
  (encoder): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=100352, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=100352, bias=True)
  (decoder): Sequential(
    (0): ReLU()
    (1): Unflatten(dim=1, unflattened_size=(32, 56, 56))
    (2): ConvTranspose2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))
    (3): ReLU()
    (4): ConvTranspose2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))
    (5): ReLU()
    (6): Conv2d(32, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): Sigmoi

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import os
import pandas as pd
from torch.utils.data import DataLoader
from sklearn.cluster import KMeans
import numpy as np


train_loader = DataLoader(train_df, batch_size=32, shuffle=True)

def extract_vae_features(dataloader, model):
  """
  Return the latent vector for each image and the corresponding indices.

  """
    features_list = []
    indices_list = []

    with torch.no_grad():
        for images, _, indices in dataloader:
            x = images.to(device)
            with torch.no_grad():
                x = model.encoder(x)
                x = model.flatten(x)
                x = model.fc1(x)

            features_list.append(x.cpu().numpy())
            indices_list.extend(indices)

    features = np.vstack(features_list)
    return features, indices_list

train_features, train_indices = extract_vae_features(train_loader, model)
print(train_features.shape)

# Apply K-Means clustering on the train_features
n_clusters = 30
kmeans = KMeans(n_clusters=n_clusters, init='k-means++', random_state=0)
kmeans.fit(train_features)

# Get cluster labels for each image
cluster_labels = kmeans.labels_

(4515, 256)


In [None]:
train_cluster_labels = kmeans.labels_

def get_representative_images(cluster_labels, indices):
  """
  Return the k samples that are the nearest to the centroid of each cluster, where k is equal to the budget.
  """
    cluster_to_images = {}
    for i in range(kmeans.n_clusters):
        cluster_indices = np.where(kmeans.labels_ == i)[0]
        cluster_features = train_features[cluster_indices]
        distances = np.linalg.norm(cluster_features - kmeans.cluster_centers_[i], axis=1)
        nearest_indices = cluster_indices[np.argsort(distances)[:1]]
        cluster_to_images[i] = [indices[idx] for idx in nearest_indices]
    return cluster_to_images

representative_images = get_representative_images(train_cluster_labels, train_indices)
print(representative_images)

{0: [tensor(1380)], 1: [tensor(1482)], 2: [tensor(1787)], 3: [tensor(2612)], 4: [tensor(235)], 5: [tensor(3743)], 6: [tensor(2044)], 7: [tensor(2939)], 8: [tensor(3667)], 9: [tensor(1570)], 10: [tensor(779)], 11: [tensor(2491)], 12: [tensor(4318)], 13: [tensor(346)], 14: [tensor(2031)], 15: [tensor(455)], 16: [tensor(4340)], 17: [tensor(3895)], 18: [tensor(2774)], 19: [tensor(4097)], 20: [tensor(2119)], 21: [tensor(1383)], 22: [tensor(822)], 23: [tensor(4215)], 24: [tensor(2803)], 25: [tensor(166)], 26: [tensor(137)], 27: [tensor(4128)], 28: [tensor(1054)], 29: [tensor(1118)]}


In [None]:
list(ids.item() for l in representative_images.values() for ids in l)

[1380,
 1482,
 1787,
 2612,
 235,
 3743,
 2044,
 2939,
 3667,
 1570,
 779,
 2491,
 4318,
 346,
 2031,
 455,
 4340,
 3895,
 2774,
 4097,
 2119,
 1383,
 822,
 4215,
 2803,
 166,
 137,
 4128,
 1054,
 1118]