In [1]:
import os
import shutil
import numpy as np
import pandas as pd
import tensorflow as tf
from tqdm import tqdm

In [2]:
metadata = pd.read_csv("./data/HAM10000_metadata.csv")
metadata

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear
...,...,...,...,...,...,...,...
10010,HAM_0002867,ISIC_0033084,akiec,histo,40.0,male,abdomen
10011,HAM_0002867,ISIC_0033550,akiec,histo,40.0,male,abdomen
10012,HAM_0002867,ISIC_0033536,akiec,histo,40.0,male,abdomen
10013,HAM_0000239,ISIC_0032854,akiec,histo,80.0,male,face


In [3]:
metadata["dx"].value_counts()

nv       6705
mel      1113
bkl      1099
bcc       514
akiec     327
vasc      142
df        115
Name: dx, dtype: int64

In [5]:
cancer_labels = ["akiec","bcc","mel"]

In [6]:
metadata["is_cancer"] = np.where(metadata["dx"].isin(cancer_labels),"cancer","not_cancer")
metadata

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,is_cancer
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,not_cancer
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,not_cancer
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,not_cancer
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,not_cancer
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,not_cancer
...,...,...,...,...,...,...,...,...
10010,HAM_0002867,ISIC_0033084,akiec,histo,40.0,male,abdomen,cancer
10011,HAM_0002867,ISIC_0033550,akiec,histo,40.0,male,abdomen,cancer
10012,HAM_0002867,ISIC_0033536,akiec,histo,40.0,male,abdomen,cancer
10013,HAM_0000239,ISIC_0032854,akiec,histo,80.0,male,face,cancer


In [7]:
metadata["is_cancer"].value_counts()

not_cancer    8061
cancer        1954
Name: is_cancer, dtype: int64

In [8]:
image_list = os.listdir("./data/HAM10000_images_part_1/")
for image in tqdm(image_list):
    shutil.copy(f"./data/HAM10000_images_part_1/{image}",f'./data/cancer/{metadata.loc[metadata["image_id"] == image[:-4]]["is_cancer"].values[0]}')

100%|██████████| 5000/5000 [00:09<00:00, 519.50it/s]


In [9]:
image_list = os.listdir("./data/HAM10000_images_part_2/")
for image in tqdm(image_list):
    shutil.copy(f"./data/HAM10000_images_part_2/{image}",f'./data/cancer/{metadata.loc[metadata["image_id"] == image[:-4]]["is_cancer"].values[0]}')

100%|██████████| 5015/5015 [00:09<00:00, 517.76it/s]


In [10]:
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=180,
                                                                  width_shift_range=0.1,
                                                                  height_shift_range=0.1,
                                                                  zoom_range=0.1,
                                                                  horizontal_flip=True,
                                                                  vertical_flip=True,
                                                                  brightness_range=[0.5,1.2],
                                                                  validation_split=0.2,
                                                                  rescale=1./255
                                                                  )

In [41]:
image_generator.flow_from_directory("./data/cancer/",batch_size=32,subset="training").next()

Found 8013 images belonging to 2 classes.


(array([[[[0.454902  , 0.4039216 , 0.4156863 ],
          [0.454902  , 0.4039216 , 0.41960788],
          [0.45098042, 0.4039216 , 0.41960788],
          ...,
          [0.45882356, 0.4431373 , 0.48235297],
          [0.43921572, 0.4156863 , 0.454902  ],
          [0.43529415, 0.40784317, 0.43921572]],
 
         [[0.45098042, 0.4039216 , 0.41176474],
          [0.454902  , 0.4039216 , 0.41960788],
          [0.45098042, 0.4039216 , 0.41960788],
          ...,
          [0.43921572, 0.40784317, 0.43921572],
          [0.44705886, 0.4039216 , 0.43137258],
          [0.44705886, 0.4039216 , 0.43137258]],
 
         [[0.45098042, 0.4039216 , 0.40784317],
          [0.454902  , 0.4039216 , 0.4156863 ],
          [0.454902  , 0.4039216 , 0.41960788],
          ...,
          [0.44705886, 0.4039216 , 0.43137258],
          [0.44705886, 0.4039216 , 0.43137258],
          [0.44705886, 0.4039216 , 0.43529415]],
 
         ...,
 
         [[0.4431373 , 0.3921569 , 0.41960788],
          [0.44313

In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout

In [12]:
model = Sequential()
model.add(Conv2D(16, (3,3), 1, activation='relu', input_shape=(256,256,3)))
model.add(MaxPooling2D())
model.add(Conv2D(32, (3,3), 1, activation='relu'))
model.add(MaxPooling2D())
model.add(Conv2D(16, (3,3), 1, activation='relu'))
model.add(MaxPooling2D())
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [13]:
model.compile('adam', loss=tf.losses.BinaryCrossentropy(), metrics=['accuracy'])

In [15]:
with tf.device('/GPU:0'):
    history = model.fit(image_generator.flow_from_directory("./data/cancer/",batch_size=32,subset="training",shuffle=True,class_mode="binary"),
                              validation_data=image_generator.flow_from_directory("./data/cancer/",batch_size=32,subset="validation",shuffle=True,class_mode="binary"),
                              epochs=25,workers=14)
                              

Found 8013 images belonging to 2 classes.
Found 2002 images belonging to 2 classes.
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
