In [None]:
import os, shutil
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline
import seaborn as sns
sns.set_style('darkgrid')

import warnings
warnings.filterwarnings('ignore')

In [None]:
from PIL import Image
from keras.utils import plot_model
from IPython.display import Image 

# preprocessing
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

# modelling
import keras
import tensorflow as tf
from keras import layers
from keras import models
from keras import utils
from keras import optimizers
from keras.models import Sequential
from keras.losses import binary_crossentropy, categorical_crossentropy
from keras.optimizers import SGD, Adam
from keras.applications.vgg19 import VGG19
from keras import losses
from keras.layers import Activation, BatchNormalization, Conv2D, Dense, Dropout, Flatten, MaxPooling2D
import math 

# callbacks
from keras.callbacks import History

from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
from tensorflow.python.client import device_lib

print(device_lib.list_local_devices())

In [None]:
IMAGE_PATH = "/content/gdrive/My Drive/Colab Notebooks/melanoma_classification/siim-isic-melanoma-classification/torch_jpeg/"

train_folder = os.path.join(IMAGE_PATH, 'train')
test_folder = os.path.join(IMAGE_PATH, 'test')


In [None]:
np.random.seed(0)

In [None]:
#train_benign = os.path.join(IMAGE_PATH + 'train' + '/', 'benign')
#test_benign = os.path.join(IMAGE_PATH + 'test' + '/', 'benign')
#files = os.listdir(train_benign)
#for f in files:
#  if np.random.rand(1) < .2:
#    shutil.move(train_benign + '/' + f, test_benign + '/' + f )

#train_malignant = os.path.join(IMAGE_PATH + 'train' + '/', 'malignant')
#test_malignant = os.path.join(IMAGE_PATH + 'test' + '/', 'malignant')
#files = os.listdir(train_malignant)
#for f in files:
#  if np.random.rand(1) < .2:
#    shutil.move(train_malignant + '/' + f, test_malignant + '/' + f )

In [None]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.3,
        rotation_range=30,
        horizontal_flip=True,
      #  vertical_flip=True,
        validation_split = 0.2)

# get all the data in the directory /train and reshape them
train_generator = train_datagen.flow_from_directory(
        train_folder, 
        target_size=(512, 512),  
        batch_size=1000,
        classes=['benign', 'malignant'])

# get all the data in the directory /train and reshape them
test_generator = train_datagen.flow_from_directory(
        test_folder, 
        target_size=(512, 512), 
        batch_size = 200,
        classes=['benign', 'malignant']
        ) 

# get all the data in the directory /val and reshape them
val_generator = train_datagen.flow_from_directory(
        train_folder,
        target_size=(512, 512), 
        batch_size= 160,
        # class_mode='binary',
        subset='validation',
        classes=['benign', 'malignant'])




In [None]:
train_images, train_labels = next(train_generator)
test_images, test_labels = next(test_generator)
val_images, val_labels = next(val_generator)

In [None]:
#@title Default title text

#preview training image
array_to_img(train_images[233])


In [None]:
#preview training image
array_to_img(train_images[5])

In [None]:
#preview training image
array_to_img(train_images[84])

In [None]:
# get shape of images in train and test

print(np.shape(train_images))
print(np.shape(train_labels))
print(np.shape(test_images))
print(np.shape(test_labels))
print(np.shape(val_images))
print(np.shape(val_labels))

In [None]:

# reshape images to contain dimensions into a single vector
train_img = train_images.reshape(train_images.shape[0], -1)
test_img = test_images.reshape(test_images.shape[0], -1)
val_img = val_images.reshape(val_images.shape[0], -1)

print(train_img.shape)
print(test_img.shape)
print(val_img.shape)

In [None]:
# reshape images to contain dimensions into a single vector using batch size
train_y = np.reshape(train_labels[:,0], (1000,1))
test_y = np.reshape(test_labels[:,0], (200,1))
val_y = np.reshape(val_labels[:,0], (160,1))

In [None]:
#preview label to identify class valujes
train_labels

In [None]:

#identify the categorical values of 0 and 1
train_generator.class_indices

In [None]:
train_labels_final = train_labels.T[[1]]
np.shape(train_labels_final)


In [None]:
test_labels_final = test_labels.T[[1]]
np.shape(test_labels_final)

In [None]:
array_to_img(train_images[210])

In [None]:
train_labels_final[:,210]

### Base CNN Model

In [None]:
# model = models.Sequential()
# model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)))
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Conv2D(64, (3, 3), activation='relu'))
 
# model.add(layers.Flatten())
# model.add(layers.Dense(10, activation='softmax'))


base_model = models.Sequential()
base_model.add(layers.Conv2D(32, (3, 3), activation='relu',
                        input_shape=(512, 512,  3)))
base_model.add(layers.MaxPooling2D((2, 2)))

base_model.add(layer.dropout(.3))
base_model.add(layers.Conv2D(32, (4, 4), activation='relu'))
base_model.add(layers.MaxPooling2D((2, 2)))

base_model.add(layer.dropout(.3))
base_model.add(layers.Conv2D(64, (3, 3), activation='relu'))
base_model.add(layers.MaxPooling2D((2, 2)))

base_model.add(layer.dropout(.3))
base_model.add(layers.Flatten())
# base_model.add(layers.Dense(64, activation='relu'))
base_model.add(layers.Dense(1, activation='sigmoid'))

base_model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])
 
# model.compile(loss='sparse_categorical_crossentropy',
#               optimizer='sgd',
#               metrics=['accuracy'])

base_model.summary()

In [None]:
plot_model(base_model, show_shapes=True, show_layer_names=True)

In [None]:
history = base_model.fit(train_images,
          train_y,
          epochs=10,
          batch_size=16,
          validation_data=(val_images, val_y))

In [None]:
results_train = base_model.evaluate(train_images, train_y)

In [None]:
results_test = base_model.evaluate(test_images, test_y)

In [None]:
results_train

In [None]:
results_test

In [None]:
def visualize_training_results(results):
    history = results.history
    plt.figure()
    plt.plot(history['val_loss'])
    plt.plot(history['loss'])
    plt.legend(['val_loss', 'loss'])
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.show()
    
    plt.figure()
    plt.plot(history['val_accuracy'])
    plt.plot(history['accuracy'])
    plt.legend(['val_accuracy', 'accuracy'])
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.show()

visualize_training_results(history)

### Model 2

In [None]:
base2_model = models.Sequential()
base2_model.add(layers.Conv2D(32, (3, 3), activation='relu',
                        input_shape=(512, 512,  3)))
base2_model.add(layers.MaxPooling2D((2, 2)))

base2_model.add(layer.dropout(.3))
base2_model.add(layers.Conv2D(32, (4, 4), activation='relu'))
base2_model.add(layers.MaxPooling2D((2, 2)))

base_model.add(layer.dropout(.3))
base2_model.add(layers.Conv2D(64, (3, 3), activation='relu'))
base2_model.add(layers.MaxPooling2D((2, 2)))

base_model.add(layer.dropout(.3))
base2_model.add(layers.Flatten())
# base2_model.add(layers.Dense(64, activation='relu'))
base2_model.add(layers.Dense(1, activation='sigmoid'))

base2_model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['accuracy']) 

base2_model.summary()

In [None]:
plot_model(base2_model, show_shapes=True, show_layer_names=True)

In [None]:
history = base2_model.fit(train_images,
          train_y,
          epochs=40,
          batch_size=16,
          validation_data=(val_images, val_y))

In [None]:
results_train = base2_model.evaluate(train_images, train_y)

In [None]:
results_test = base2_model.evaluate(test_images, test_y)

In [None]:
results_train

In [None]:
results_test

In [None]:
visualize_training_results(history)



In [None]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = base_model.predict(test_images, batch_size=32)
y_pred_bool = np.argmax(y_pred, axis=1)

print(classification_report(test_y, y_pred_bool))

In [None]:
confusion_matrix(test_y, y_pred_bool)

In [None]:
confusion_matrix_df = pd.DataFrame(confusion_matrix(test_y, y_pred_bool)).rename(columns=train_generator.class_indices, index=train_generator.class_indices)
fig, ax = plt.subplots(figsize=(7,5))         
sns.heatmap(confusion_matrix_df, annot=True, ax=ax)

In [None]:


y_pred = base2_model.predict(test_images, batch_size=128)
y_pred_bool = np.argmax(y_pred, axis=1)

print(classification_report(test_y, y_pred_bool))

In [None]:
confusion_matrix(test_y, y_pred_bool)

In [None]:
confusion_matrix_df = pd.DataFrame(confusion_matrix(test_y, y_pred_bool)).rename(columns=train_generator.class_indices, index=train_generator.class_indices)
fig, ax = plt.subplots(figsize=(7,5))         
sns.heatmap(confusion_matrix_df, annot=True, ax=ax)