#Mounting Google drive

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


#Import keras and other python libraries

In [2]:
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from sklearn.model_selection import train_test_split
import numpy as np
from PIL import Image
import cv2
from time import time
import matplotlib.pyplot as plt
import matplotlib
from keras.callbacks import TensorBoard
import os
from keras.optimizers import SGD
from os import listdir
from os.path import join, basename
from keras.preprocessing.image import ImageDataGenerator,load_img,array_to_img, img_to_array
from keras.regularizers import l2
from keras import regularizers


Using TensorFlow backend.


#address to the drive directory

In [0]:
dirc='/gdrive/My Drive/Final Project/DermCNN data/'
labels=[]
data=[]
im_width=150
im_height=150

#Fetching in data and resizing each image on the size 150*150

In [0]:
for i in range(7):
    path=os.getcwd()+dirc+str(i+1)+'/'
    print(path)
    progress = 0
    image_files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path,f))]
    for file_name in image_files:
        image_file = str(path + file_name)
        img = cv2.imread(image_file)
        new_img = cv2.resize(img,(im_width,im_height))
        data.append(new_img)
        progress = progress+1
        labels.append(int(path[-2])-1)
        if progress%100==0:
            print('Progress '+str(progress)+' images :'  + path[-2])

#Converting data and labels to numpy arrays and categorizing labels into 7 classes

In [5]:
data=np.array(data)
print(data.shape)
# data=data.reshape((data.shape)[0],(data.shape)[1],(data.shape)[2],1)
print(data.shape)
labels=np.array(labels)
print(labels.shape)
print(len(labels))
print(labels)
# labels.astype('uint8')
labels = keras.utils.to_categorical(labels, 7)      

(13127, 150, 150, 3)
(13127, 150, 150, 3)
(13127,)
13127
[0 0 0 ... 6 6 6]


#Normalizing (Gave a jump in performance by atleast 2%)

In [0]:
data = np.array(data).astype('uint8')
data = data / 255

#Shuffling Data and Labels on the same indices

In [0]:
def shuffle(a, b):
    rng_state = np.random.get_state()
    np.random.shuffle(a)
    np.random.set_state(rng_state)
    np.random.shuffle(b)

In [0]:
for i in range(10):
    shuffle(data,labels)

#The model begins here with 6 conv layers. 
###The first conv layer with 128 filters to extract the maximum low-level features, in this case the tumors. 
###The latter layers have decreasing filters emphasizing the impact of low-level features.
###Used dropout to tackle overfitting.
###Used Batch Normalization

# Colab unexpectedly stopped after 182nd iteration, and would not have resumed from the epoch

##spliting training and validation data in the fit function.

In [0]:
model = Sequential()
model.add(Conv2D(kernel_size=(3,3),filters=128,input_shape=(150, 150, 3),activation="relu",padding="valid"))
model.add(Conv2D(kernel_size=(3,3),filters=64,activation="relu",padding="same"))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(kernel_size=(3,3),filters=32,activation="relu",padding="same"))
model.add(Conv2D(kernel_size=(3,3),filters=32,activation="relu",padding="same"))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(kernel_size=(3,3),filters=16,activation="relu",padding="same"))
model.add(Conv2D(kernel_size=(3,3),filters=16,activation="relu",padding="same"))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
model.add(Flatten())
model.add(Dropout(0.75))
model.add(Dense(100,activation="relu"))
model.add(Dropout(0.5))
keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)
model.add(Dense(7,activation='softmax'))
model.summary()
model.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
history = model.fit(data,labels,
          batch_size=100,
          epochs=200,
          verbose=1,
          shuffle=True,
          validation_split=0.3)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 148, 148, 128)     3584      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 148, 148, 64)      73792     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 74, 74, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 74, 74, 32)        18464     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 74, 74, 32)        9248      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 37, 37, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 37, 37, 16)        4624      
__________

#The pattern would have been the same on the 200th epoch. 
##Final output might have been:    Train: 96%, Test: 94%

#Wasn't able to plot the graph due the disconnection of colab's kernel

In [0]:
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
#loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()