In [None]:
import numpy as np
import pandas as pd 
import keras
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random
import os

In [None]:
# assign FAST_RUN=True to train the model with three epochs
FAST_RUN = False

# input image dimensions
IMAGE_WIDTH=128
IMAGE_HEIGHT=128
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)

# red, green and blue channels
IMAGE_CHANNELS=3

In [None]:
dataframe = pd.read_csv("../input/siim-isic-melanoma-classification/train.csv")
dataframe.columns

In [None]:
dataframe=dataframe.drop(labels=["patient_id","sex","age_approx","anatom_site_general_challenge","diagnosis","benign_malignant"],axis=1)
dataframe["image_name"]=dataframe["image_name"].apply(lambda x:x+".jpg")
dataframe.shape

In [None]:
benign_df=dataframe[dataframe.target==0].copy()
malignant_df = dataframe[dataframe.target==1].copy()

In [None]:
benign_df.head(3)

In [None]:
malignant_df.head(3)

In [None]:
train_benign,val_benign = train_test_split(benign_df,test_size=0.2,random_state=0)
train_malignant, val_malignant = train_test_split(malignant_df,test_size=0.2,random_state=0)
print(train_benign.shape[0])
print(val_benign.shape[0])
print(train_malignant.shape[0])
print(val_malignant.shape[0])

In [None]:
train_malignant_oversampled = train_malignant.copy()

while(True):
    if train_malignant_oversampled.shape[0] > train_benign.shape[0]:
        break
    train_malignant_oversampled = pd.concat([train_malignant_oversampled,train_malignant])
    
print(train_benign.shape[0])
print(val_benign.shape[0])
print(train_malignant_oversampled.shape[0])
print(val_malignant.shape[0])

In [None]:
train_df = pd.concat([train_benign,train_malignant_oversampled])
val_df = pd.concat([val_benign,val_malignant])
print(train_df.shape)
print(val_df.shape)

In [None]:
train_df['target'].value_counts().plot.bar()

In [None]:
# random.choice(train_df["image_name"].values)


In [None]:
sample = random.choice(train_df["image_name"].values)
image = load_img("../input/siim-isic-melanoma-classification/jpeg/train/"+sample)
plt.imshow(image)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization

model = Sequential()

# layer 1
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS)))
model.add(BatchNormalization())

model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

#layer 2
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())

model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

#layer 3
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())

model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

#fully connected layer
model.add(Flatten())

model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(2, activation='softmax')) # 2 because we have cat and dog classes

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

model.summary()

In [None]:
!mkdir saved_model

In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
earlystop = EarlyStopping(patience=10)
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

class CustomSaver(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if epoch % 2==0:  # or save after some epoch, each k-th epoch etc.
            self.model.save("saved_model/model_{}.hd5".format(epoch))
            
saver = CustomSaver()

callbacks = [earlystop, learning_rate_reduction,saver]

In [None]:
train_df["target"] = train_df["target"].replace({0: 'benign', 1: 'malignant'}) 
val_df["target"] = val_df["target"].replace({0: 'benign', 1: 'malignant'}) 

In [None]:
train_df.head(5)

In [None]:
train_df = train_df.reset_index(drop=True)
validate_df = val_df.reset_index(drop=True)

In [None]:
train_df['target'].value_counts().plot.bar()

In [None]:
validate_df['target'].value_counts().plot.bar()

In [None]:
# get the total amount of data in train and validation set
total_train = train_df.shape[0]
total_validate = validate_df.shape[0]

# set the minibatch size to 15
batch_size=15

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=360,
    rescale=1./255,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

In [None]:
train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    "../input/siim-isic-melanoma-classification/jpeg/train/", 
    x_col='image_name',
    y_col='target',
    target_size=IMAGE_SIZE,
    class_mode='categorical',
    batch_size=batch_size
)

In [None]:
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    "../input/siim-isic-melanoma-classification/jpeg/train/",
    x_col='image_name',
    y_col='target',
    target_size=IMAGE_SIZE,
    class_mode='categorical',
    batch_size=batch_size
)

In [None]:
# here we will create a dataframe with one row from the training dataframe for demonstrating
# how the datagenerator works
example_df = train_df.sample(n=1).reset_index(drop=True)
example_generator = train_datagen.flow_from_dataframe(
    example_df, 
    "../input/siim-isic-melanoma-classification/jpeg/train/", 
    x_col='image_name',
    y_col='target',
    target_size=IMAGE_SIZE,
    class_mode='categorical'
)

In [None]:
# we will generate 15 random image from our example data generator and show them using
# matplotlib
plt.figure(figsize=(12, 12))
for i in range(0, 15):
    plt.subplot(5, 3, i+1)
    #on each iteration of the for loop, the generator returns the same amount of random 
    #images as the original dataframe on which the generator was created
    for X_batch, Y_batch in example_generator:
        #get the first image of the generated batch
        image = X_batch[0]
        plt.imshow(image)
        break
plt.tight_layout()
plt.show()

In [None]:
epochs=3 if FAST_RUN else 5

In [None]:
history = model.fit_generator(
    train_generator, 
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=total_validate//batch_size,
    steps_per_epoch=total_train//batch_size,
    callbacks=callbacks
)

In [None]:
# model.save_weights("saved_model/model.h5")

import pickle
with open('saved_model/history.pickle', 'wb') as f:
    pickle.dump(history, f)