In [None]:
# Torch Imports
import torch
import torchvision
import torchvision.transforms as transforms

In [None]:
# Mount Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# HAM10000 Dataloader
import os
import torch
import pandas as pd
import torchvision
import torchvision.io
from torchvision.io.image import read_image
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
import cv2 as io

class Ham10000(Dataset):
    def __init__(self, csv_file, directory, transform, datasetname):
        self.annotations = pd.read_csv(f"{csv_file}")
        self.img_root_dir = 'dataverse_files_off'
        self.transform = transform
        self.datasetname = datasetname
        self.csv_file = csv_file
        self.directory = directory
        
    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_path = ''
        img_path = os.path.join(self.directory, self.datasetname, self.annotations.iloc[idx, 1])
        img_path += ".jpg"

        x_img = io.imread(img_path)
        if self.transform:
            x_img = self.transform(x_img)


        y_label = torch.tensor(int(self.annotations.iloc[idx, 7]))
        
        return (x_img, y_label)

In [None]:
# Load in dataset

csv_file = "./drive/MyDrive/Colab Notebooks/dataverse_files_off/HAM10000_metadata.csv"
directory = "./drive/MyDrive/Colab Notebooks/dataverse_files_off"

transform = transforms.Compose(
      [
        transforms.ToTensor()
      ]
    )

datasetname = "HAM10000_images_off"
dataset = Ham10000(csv_file, directory, transform, datasetname)

In [None]:
# Generate Sample Images from each class
import pandas as pd
from PIL import Image, ImageFont, ImageDraw 

df = pd.read_csv(csv_file)

sample_imgs = pd.DataFrame(columns=df.columns)
# get one image per class
for i, val in enumerate(df['label'].unique(),0):
  elems = df[df['label'] == val]
  sample_imgs.loc[val] = elems.iloc[0]


sample_imgs.sort_index(inplace=True)

# display images
image_width = 600
image_height = 450
stitched_image = Image.new('RGB', (image_width*7, image_height))

font = ImageFont.truetype("arial.ttf", 60) # must add font file to current working directory

for index,row in sample_imgs.iterrows():
  image_path = os.path.join(directory,datasetname,row['image_id'])
  image_path += ".jpg"

  im = Image.open(image_path)
  image_editable = ImageDraw.Draw(im)
  image_editable.text((image_width/2-30,image_height-70), row['dx'], (255, 0, 0), font=font, anchor='mm')

  stitched_image.paste(im=im,box=(image_width*index, 0))

display(stitched_image)

In [None]:
# Visalize Tranformation

csv_file = "./drive/MyDrive/Colab Notebooks/dataverse_files_off/HAM10000_metadata.csv"
directory = "./drive/MyDrive/Colab Notebooks/dataverse_files_off"

import torchvision.transforms as T

transform = transforms.Compose(
      [
        transforms.ToTensor(),
        transforms.RandomCrop(224),
        transforms.RandomRotation(degrees=(13)),
        transforms.ColorJitter(brightness=.5, hue=.3),
        transforms.Normalize((0.1411, 0.0923, 0.5270), (0.3407, 0.3058, 0.2824))
          
      ]
    )

img_path = os.path.join(directory,datasetname,"ISIC_0027419.jpg")
im_og = Image.open(img_path)

im_new = transform(im_og)
conv_back = T.ToPILImage()
display(im_og)
display(conv_back(im_new))

In [None]:
# Get Distribution of Classes
import seaborn as sns
df = pd.read_csv(csv_file)
print(df['dx'].value_counts())
df.rename(columns={'dx':"class"},inplace=True)

sns.set_theme(style="darkgrid")
ax = sns.countplot(x="class", data=df).set(title="Distribution of Lesions",xlabel='Class',ylabel='Count')

In [None]:
# Get Gender Distribution
import seaborn as sns
df = pd.read_csv(csv_file)

sns.set_theme(style="darkgrid")
ax = sns.countplot(x="sex", data=df).set(title="Gender Distribution",xlabel='Gender',ylabel='Count')

In [None]:
# Get Age Distribtuion
import seaborn as sns
df = pd.read_csv(csv_file)
ages = df.dropna()
# ages.loc[~(ages==0).all(axis=1)]

print(ages['age'].unique())
sns.set(font_scale=0.6)
ax = sns.countplot(x="age", data=ages).set(title="Age Distribution",xlabel='Age',ylabel='Count')