# Modelling and Evaluation

In [13]:
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
from tensorflow import keras
from tensorflow.keras import layers, Input
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Activation, Dense, Flatten, BatchNormalization, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from PIL import Image

import matplotlib.pyplot as plt
import os
import PIL
import shutil
import random
import glob
import itertools

In [14]:
train_dir = 'inputs/cancer-dataset/train'
test_dir = 'inputs/cancer-dataset/test'
val_dir = 'inputs/cancer-dataset/validate'

In [15]:
meta_data = pd.read_csv("inputs/cancer-dataset/HAM10000_metadata.csv")
meta_data.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear


## Data Preprocessing

In [16]:
image_size = (224, 224)
batch_size = 32

# Create an ImageDataGenerator for preprocessing
datagen = ImageDataGenerator(preprocessing_function = tf.keras.applications.vgg16.preprocess_input)

# Create the train_batches using flow_from_directory
train_batches = datagen.flow_from_directory(
    train_dir,
    target_size=image_size,
    batch_size=batch_size,
    classes=['akiec', 'mel', 'nv', 'bcc', 'df', 'vasc', 'bkl']
)

# Create the test_batches using flow_from_directory
test_batches = datagen.flow_from_directory(
    test_dir,
    target_size=image_size,
    batch_size=batch_size,
    classes=['akiec', 'mel', 'nv', 'bcc', 'df', 'vasc', 'bkl']
)

# Create the valid_batches using flow_from_directory
valid_batches = datagen.flow_from_directory(
    val_dir,
    target_size=image_size,
    batch_size=batch_size,
    classes=['akiec', 'mel', 'nv', 'bcc', 'df', 'vasc', 'bkl']
)

Found 7010 images belonging to 7 classes.
Found 2003 images belonging to 7 classes.
Found 1002 images belonging to 7 classes.


## Build CNN

model = Sequential([
    Input(shape=(224,224,3)),
    Conv2D(filters=32, kernel_size=(3,3), padding='same', activation='relu'),
    BatchNormalization(),
    MaxPool2D(pool_size=(2,2), strides=2),
    
    Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu'),
    MaxPool2D(pool_size=(2,2), strides=2),
    
    Flatten(),
    
    Dense(units=7, activation='softmax'),
])

model.summary()

  model.compile(optimizer= Adam(learning_rate = 0.0001), 
              loss= 'categorical_crossentropy', 
              metrics=['accuracy'])

## Training 
* This step can be skipped if a model has been trained previously. 

In [None]:
model.fit(
    x=train_batches, 
    validation_data=valid_batches,
    epochs=10, 
    verbose=2)


In [None]:
model.save('outputs/model.keras')

## Model Evaluation

In [9]:
metrics = model.history.history

train_loss = metrics["loss"]
train_accuracy = metrics["accuracy"]

test_loss = metrics["val_loss"]
test_accuracy = metrics["val_accuracy"]

# Visualizing metrics
plt.figure(figsize = (13,4))
plt.subplot(1,2,1)
plt.title("Loss.")
plt.plot(train_loss, label = "Train");
plt.plot(test_loss, label = "Test");
plt.grid(True)
plt.legend(loc = "best");

plt.subplot(1,2,2)
plt.title("Accuracy.")
plt.plot(train_accuracy, label = "Train");
plt.plot(test_accuracy, label = "Test");
plt.grid(True)
plt.legend(loc = "best");

AttributeError: 'Sequential' object has no attribute 'history'

## Model Prediction

In [17]:
model = load_model('outputs/model.keras')

In [20]:
predictions = model.predict(x=test_batches, verbose=0)
np.round(predictions)

array([[4.0247127e-08, 6.3152486e-01, 3.6314777e-01, ..., 1.2382140e-12,
        1.9007976e-13, 5.3272713e-03],
       [4.7402181e-02, 2.6852276e-02, 5.5342070e-03, ..., 1.9528640e-02,
        1.9396460e-04, 6.1274055e-02],
       [4.6593173e-06, 1.5192643e-01, 8.1109089e-01, ..., 9.7608781e-06,
        5.9753773e-08, 3.6825072e-02],
       ...,
       [1.1675778e-05, 5.6037732e-04, 2.2862205e-01, ..., 1.1493804e-04,
        5.7349284e-06, 7.4552059e-01],
       [9.9470932e-04, 2.3058687e-05, 8.5163963e-01, ..., 5.6512054e-04,
        7.8032389e-03, 1.2727156e-01],
       [1.8433525e-03, 1.1141908e-01, 5.1849139e-01, ..., 1.8158810e-04,
        1.2414660e-08, 3.6416486e-01]], dtype=float32)