# Moledetection



In [None]:
# import all libraries needed for this notebook
###############################################
import os
import numpy as np
import pandas as pd 
import shutil

import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
import autokeras as ak

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from PIL import Image

# Classes to predict
seven={
                'nv':'Melanocytic nevi',
                'mel':'Melanoma',
                'bkl':'Benign keratosis-like lesions',
                'bcc':'Basal cell carcinoma',
                'akiec':'Actinic keratoses',
                'vas':'Vascular lesions',
                'df':'Dermatofibroma'
                }

In [None]:
# This part should only be run once !
#
# Prepare directory structure for augmented images to balance the dataset
#########################################################################################
#   
# dataset is unbalanced
#            6705 images for   'nv':'Melanocytic nevi',
#            1113 images for   'mel':'Melanoma',
#            1099 images for   'bkl':'Benign keratosis-like lesions',
#            514  images for  'bcc':'Basal cell carcinoma',
#            327  images for  'akiec':'Actinic keratoses',
#            142  images for  'vas':'Vascular lesions',
#            115  images for  'df':'Dermatofibroma'
#      
# create directory structure :
##############################
# ./backup/data/HAM10000/HAM10000_images_part_1  <-- contains the downloaded first imageset
# ./backup/data/HAM10000/HAM10000_images_part_2  <-- contains the downloaded second imageset
# ./backup/data/HAM10000/HAM10000_metadata.csv    <-- contains the downloaded csv with picnames with prediction
#
# ./backup/data/HAM10000/ALL          <-- create directory and put all available images here 
#
# create directories for every class as follows
###############################################
#
# ./backup/data/HAM10000/myaugmenting/nv
# ./backup/data/HAM10000/myaugmenting/mel
# ./backup/data/HAM10000/myaugmenting/bkl
# ./backup/data/HAM10000/myaugmenting/bcc
# ./backup/data/HAM10000/myaugmenting/akiec
# ./backup/data/HAM10000/myaugmenting/vas
# ./backup/data/HAM10000/myaugmenting/df
#
# ./backup/data/HAM10000/myaugmenting/nv_out
# ./backup/data/HAM10000/myaugmenting/mel_out
# ./backup/data/HAM10000/myaugmenting/bkl_out
# ./backup/data/HAM10000/myaugmenting/bcc_out
# ./backup/data/HAM10000/myaugmenting/akiec_out
# ./backup/data/HAM10000/myaugmenting/vas_out
# ./backup/data/HAM10000/myaugmenting/df_out
#
###############################################

# copy the images from 'ALL' directory to the corresponding class subdirectories in directory 'myaugmenting'
############################################################################################################
df=pd.read_csv("./backup/data/HAM10000/HAM10000_metadata.csv")
df['frm']='./backup/data/HAM10000/ALL/'+df['image_id']+'.jpg'
df['to']='./backup/data/HAM10000/myaugmenting/'+df['dx']+'/'+df['image_id']+'.jpg'
df=df[['frm','to']]
fromtolist=df.values.tolist()
i=0
j=0
for frm,to in fromtolist:
    try:
        shutil.copyfile(frm, to)
    except:
        j+=1
    i+=1
    print (f"\r>> copied: {i}    could not copy: {j}", end='', flush=True) 



In [1]:

# Create augmented images and save them in the _out subdirectories for every class
##################################################################################

imagegenerator = ImageDataGenerator(
        rescale=1 / 255.0,
        rotation_range=51,
        zoom_range=0.07,
        width_shift_range=0.07,
        height_shift_range=0.05,
        shear_range=0.05,
        horizontal_flip=True,
        fill_mode="nearest",
        validation_split=0.20)


# the number of available images per class in original dataset
sevennmbr={
            'nv':6705,
            'mel':1113,
            'bkl':1099,
            'bcc':514,
            'akiec':327,
            'vas':142,
            'df':115
            }

for classname in seven.keys():
    save_directory='../backup/data/HAM10000/picaugment/' + classname + '_out'
    directory='../backup/data/HAM10000/picaugment/' + classname + '/'

    img_gen=imagegenerator.flow_from_directory(
        directory=directory,
        target_size=(256, 256),
        color_mode='rgb',
        classes=[classname],
        class_mode='categorical',
        batch_size=sevennmbr[classname],
        shuffle=True,
        seed=None,
        save_to_dir=save_directory,
        save_prefix=classname,
        save_format='png',
        follow_links=False,
        subset=None,
        interpolation='nearest',
        keep_aspect_ratio=False
    )

# Depending on how many images we have create necessary augmented images
# when iterating trough they will be created in the corresponding subdirectory
    i=0
    for img in img_gen:
        i+=1
        print (f"\r>> img created: {i}", end='', flush=True)
        print(type(img))
        if i == 1 + int(6710/(sevennmbr[classname])):
            break

nv
Found 6401 images belonging to 1 classes.
>> img created: 1<class 'tuple'>
>> img created: 2<class 'tuple'>
mel
Found 1014 images belonging to 1 classes.
>> img created: 1<class 'tuple'>
>> img created: 2<class 'tuple'>
>> img created: 3<class 'tuple'>
>> img created: 4<class 'tuple'>
>> img created: 5<class 'tuple'>
>> img created: 6<class 'tuple'>
>> img created: 7<class 'tuple'>
bkl
Found 1000 images belonging to 1 classes.
>> img created: 1<class 'tuple'>
>> img created: 2<class 'tuple'>
>> img created: 3<class 'tuple'>
>> img created: 4<class 'tuple'>
>> img created: 5<class 'tuple'>
>> img created: 6<class 'tuple'>
>> img created: 7<class 'tuple'>
bcc
Found 459 images belonging to 1 classes.
>> img created: 1<class 'tuple'>
>> img created: 2<class 'tuple'>
>> img created: 3<class 'tuple'>
>> img created: 4<class 'tuple'>
>> img created: 5<class 'tuple'>
>> img created: 6<class 'tuple'>
>> img created: 7<class 'tuple'>
>> img created: 8<class 'tuple'>
>> img created: 9<class 't

In [2]:

# create an X and Y_cat for modelling
######################################
#

# Get the list of all imagefiles with their subdirectory
mergeinfo=[]
for i in ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vas']:
    path = "../backup/data/HAM10000/picaugment/" + i + "_out"
    dir_list = os.listdir(path)
    stopcnt=20000
    for j in dir_list:
        full=path + "/" + j
        mergeinfo.append([i,full])
        stopcnt-=1
        if stopcnt == 0:
            break 
    
dfinfo=pd.DataFrame(mergeinfo, columns=['dx','path'])

# encode categorical
le = LabelEncoder()
le.fit(dfinfo['dx'])
dfinfo['label'] = le.transform(dfinfo["dx"]) 

#Use the path to read images.
SIZE=64
dfinfo['image'] = dfinfo['path'].map(lambda x: np.asarray(Image.open(x).resize((SIZE,SIZE))))
#Convert dataframe column of images into numpy array
X = np.asarray(dfinfo['image'].tolist())
X = X/255. # Scale values
Y=dfinfo['label'] # Assign labelencoded values to Y (0 to 6)
Y_cat = to_categorical(Y, num_classes=7) #Convert to categorical ex. 2 -becomes-> [0,0,1,0,0,0,0]

#print(X.shape, Y_cat.shape)
# output : X, Y_cat



['akiec' 'bcc' 'bkl' 'df' 'mel' 'nv' 'vas']
(51791, 64, 64, 3) (51791, 7)


In [3]:
# let autokeras select a model using the augmented dataset 
#
# for testing/validating the best model we will use small portion 5% test 5% train
##################################################################################
#
# start model selection
#
X_train, X_test, Y_train, Y_test = train_test_split(X, Y_cat, test_size=0.10, random_state=42, shuffle=True)
x_train_auto, x_test_auto, y_train_auto, y_test_auto = train_test_split(X_test, Y_test, test_size=0.5, random_state=42, shuffle=True)
#
# start model selection
# Define classifier for autokeras. Check 25 different models, each model 25 epochs
# 
clf = ak.ImageClassifier(max_trials=25) #MaxTrials - max. number of keras models to try
clf.fit(x_train_auto, y_train_auto, epochs=25)

#Evaluate the classifier on test data

_, acc = clf.evaluate(x_test_auto, y_test_auto)
print("Accuracy = ", (acc * 100.0), "%")

# get the final best performing model
model = clf.export_model()

#Save the best model 
model.save('../pybin/models/activemodel_best_to_train')

# start model selection
INFO:tensorflow:Reloading Oracle from existing project .\image_classifier\oracle.json
INFO:tensorflow:Reloading Tuner from .\image_classifier\tuner0.json
INFO:tensorflow:Oracle triggered exit
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25




INFO:tensorflow:Assets written to: .\image_classifier\best_model\assets


INFO:tensorflow:Assets written to: .\image_classifier\best_model\assets


#Evaluate the classifier on test data 
Accuracy =  58.4169864654541 %
# get the final best performing model 
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 64, 64, 3)]       0         
                                                                 
 cast_to_float32 (CastToFloa  (None, 64, 64, 3)        0         
 t32)                                                            
                                                                 
 normalization (Normalizatio  (None, 64, 64, 3)        7         
 n)                                                              
                                                                 
 conv2d (Conv2D)             (None, 62, 62, 32)        896       
                                                                 
 conv2d_1 (Conv2D)           (None, 60, 60, 64)        18496     
                  



INFO:tensorflow:Assets written to: ../pybin/models/activemodel_best_to_train\assets


INFO:tensorflow:Assets written to: ../pybin/models/activemodel_best_to_train\assets


In [6]:
# train the best model
######################
#

# Split the data in train/test set 
X_train_augm, X_test_augm, Y_train_augm, Y_test_augm = train_test_split(X_train, Y_train, test_size=0.3, random_state=42, shuffle=True)

# Load the model to train
model = keras.models.load_model('../pybin/models/activemodel_best_to_train')
# Train
model.fit(X_train_augm,Y_train_augm,epochs=10)
# Evaluate
res=model.evaluate(X_test_augm,Y_test_augm)

# Save the model
model.save('../pybin/models/activemodel_best_trained')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.7492948770523071, 0.7170337438583374]
#Save the model




INFO:tensorflow:Assets written to: ../pybin/models/activemodel_best_trained\assets


INFO:tensorflow:Assets written to: ../pybin/models/activemodel_best_trained\assets


INFO:tensorflow:Assets written to: ../pybin/models/activemodel\assets


INFO:tensorflow:Assets written to: ../pybin/models/activemodel\assets


In [5]:
# Do a testprediction
###################
#
model = keras.models.load_model('../pybin/models/activemodel_best_trained')

# If importing an external image you'll have to rescale it (so : divide by 225.)
#The augmented data in X_test_augm is are already rescaled!
img=np.reshape(X_test_augm[0], (-1, 64, 64, 3)) 
#
y_pred=model.predict(img)
# print result
print(y_pred)


[[9.4145754e-08 1.5575818e-03 1.3532737e-06 6.3111584e-05 8.4166946e-05
  6.0101952e-03 9.9228352e-01]]


In [7]:
Y_test_augm[0]

array([0., 0., 0., 0., 0., 0., 1.], dtype=float32)