# Baseline Model 

In [1]:
import os
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
import warnings
warnings.filterwarnings("ignore")


#Setting up the directory and loading the data
base_image_dir = os.path.join('/Users/abhisheknajbile/Documents/TermProject_3253/input/')
train_dir = os.path.join(base_image_dir,'train_images/')
data = pd.read_csv(os.path.join(base_image_dir, 'train.csv'))
data["diagnosis"] = data["diagnosis"].astype(str)
data['path'] = data['id_code'].map(lambda x: os.path.join(train_dir,'{}.png'.format(x)))
data["id_code"]=data["id_code"].apply(lambda x:x+".png")


#Intializing the variables needed for the model
nb_classes = 5
lbls = list(map(str, range(nb_classes)))
batch_size = 100
img_size = 224
nb_epochs = 100


#Preprocessing the data
from keras_preprocessing.image import ImageDataGenerator

# ImageDataGenerator is used to generate training data as well as data augmentation
train_datagen=ImageDataGenerator(
    rescale=1./255,
    validation_split=0.25
    )

#Creating a Training Validation and Testing generator
train_generator=train_datagen.flow_from_dataframe(
    dataframe=data,
    directory="/Users/abhisheknajbile/Documents/TermProject_3253/input/train_images/",
    x_col="id_code",
    y_col="diagnosis",
    batch_size=batch_size,
    shuffle=True,
    class_mode="categorical",
    classes=lbls,
    target_size=(img_size,img_size),
subset='training')

valid_generator=train_datagen.flow_from_dataframe(
    dataframe=data,
    directory="/Users/abhisheknajbile/Documents/TermProject_3253/input/train_images/",
    x_col="id_code",
    y_col="diagnosis",
    batch_size=batch_size,
    shuffle=True,
    class_mode="categorical", 
    classes=lbls,
    target_size=(img_size,img_size),
subset='validation')


Found 2747 images belonging to 5 classes.
Found 915 images belonging to 5 classes.


# Creating a Baseline model

Please note this model is very large and takes an upward of 13hrs to run on a Geforce RTX 2080Ti graphics card with 11GB memory and 64GB of computer RAM.

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Activation, GlobalAveragePooling2D, Conv2D, MaxPooling2D

In [3]:
#Funtion to create a model
def create_model(img_size,activation_layers="relu",padding="same",
                activation_output="softmax",optimizer='adam',
                loss='categorical_crossentropy'):  
    
    model = Sequential()
    model.add(Conv2D(64, kernel_size=3, activation=activation_layers, padding=padding,input_shape=(img_size,img_size,3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(32, kernel_size=3, activation=activation_layers))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(32, kernel_size=3, activation=activation_layers))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(512,activation=activation_layers))
    model.add(Dense(128,activation=activation_layers))
    model.add(Dense(5, activation=activation_output))
    
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
        
    
    return model

model = create_model(img_size)

In [4]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 224, 224, 64)      1792      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 64)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 110, 110, 32)      18464     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 55, 55, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 53, 53, 32)        9248      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 26, 26, 32)        0         
_________________________________________________________________
flatten (Flatten)            (None, 21632)             0

In [None]:
history = model.fit_generator(generator=train_generator,
                                    validation_data=valid_generator,
                                    steps_per_epoch=train.shape[0]/batch_size, 
                                    epochs=nb_epochs,
                                    max_queue_size=16,
                                    workers=4,
                                    use_multiprocessing=True,
                                    validation_steps=train.shape[0]/batch_size,  
                                    verbose=1)

In [None]:
#Accuracy Plot

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()


In [None]:
#Loss Plot
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()