In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image

import tensorflow as tf
from tensorflow import keras

import os
from glob import glob
import time 
import copy

In [9]:
np.random.seed(128)
tf.random.set_seed(128)

In [10]:
CUDA = tf.test.is_built_with_cuda()
DEVICE = tf.device(tf.test.gpu_device_name())

In [None]:
df = pd.read_csv('../data/HAM10000_metadata.csv')
df.head()

In [None]:
lesion_type = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesion',
    'bcc': 'Basel cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesion',
    'df': 'Dermatofibroma'
}

imageid_path = {os.path.splitext(os.path.basename(x))[0]: x 
                for x in glob(os.path.join('..\\data', '*', '*.jpg'))}

In [None]:
df['path'] = df['image_id'].map(imageid_path.get)
df['cell_type'] = df['dx'].map(lesion_type.get)
df['target'] = pd.Categorical(df['cell_type']).codes

In [None]:
df['cell_type'].value_counts()

In [None]:
df['target'].value_counts()

In [None]:
df.head()

In [None]:
n_samples = 5
fig, m_axs = plt.subplots(7, n_samples, figsize = (4*n_samples, 3*7))
for n_axs, (type_name, type_rows) in zip(m_axs, df.sort_values(['cell_type']).groupby('cell_type')):
    n_axs[0].set_title(type_name)
    for c_ax, (_, c_row) in zip(n_axs, type_rows.sample(n_samples, random_state=128).iterrows()):
        img = mpimg.imread(c_row['path'])
        c_ax.imshow(img)
        c_ax.axis('off')
fig.savefig('./output/category_samples.png', dpi=300)
fig.show()