In [0]:
#Imports
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from keras.layers.normalization import BatchNormalization
from keras.callbacks import ModelCheckpoint 

In [0]:
# Specify the input shape to the first convolutional layer
input_shape = (120, 160, 3)
nClasses = 2

def createModel():
    model = Sequential()
    # a convolution layer of 32 features of size 3x3 with relu activation and zero padding
    model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=input_shape))
    # a convolution layer of 32 features of size 3x3 with relu activation
    model.add(Conv2D(32, (3, 3), activation='relu'))
    # a batch normalization layer
    model.add(BatchNormalization())
    # maxpooling layer of filter size 2x2
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    # a convolution layer of 64 features of size 3x3 with relu activation and zero padding
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    # a convolution layer of 64 features of size 3x3 with relu activation
    model.add(Conv2D(64, (3, 3), activation='relu'))
    # a batch normalization layer
    model.add(BatchNormalization())
    # maxpooling layer of filter size 2x2
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    # a convolution layer of 64 features of size 3x3 with relu activation and zero padding
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    # a convolution layer of 64 features of size 3x3 with relu activation
    model.add(Conv2D(64, (3, 3), activation='relu'))
    # a batch normalization layer
    model.add(BatchNormalization())
    # maxpooling layer of filter size 2x2
    model.add(MaxPooling2D(pool_size=(2, 2)))
    # a dropout layer of 50%
    model.add(Dropout(0.5))
    
    # flatten the output of the previous layer
    model.add(Flatten())
    # add a dense layer that outputs 512 units and apply relu activation
    model.add(Dense(512, activation='relu'))
    # a dropout layer of 50%
    model.add(Dropout(0.5))
    # add a dense layer with a softmax activation to classify the images
    model.add(Dense(nClasses, activation='softmax'))
     
    return model

In [0]:
#Create a model and compile it
model = createModel()
model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [47]:
#mounting my drive to use the dataset and entering the authorization code
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


### **Point 1: Reading modified dataset using ImageDataGenerator and splitting it into training, validation & testing sets**

### **Point 2: Uaing ImageDataGenerator parameters to split the training and validation set into 80:20 ration (validation split = 0.2) and to standardize the data (samplewise_center = true and samplewise_std_normalization=true)** 

In [0]:
# Using Keras' Image Data Generator to read the modified dataset and continue training
# Split the dataset into training and validation sets
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
        rescale=1./255,
        samplewise_std_normalization=True,
        samplewise_center=True,
        validation_split=0.2)

#Do the normalizaton for the testing set
test_datagen = ImageDataGenerator(rescale=1./255, samplewise_std_normalization=True, samplewise_center=True)


In [49]:
# Reading training dataset from the google drive directory
train_generator = train_datagen.flow_from_directory(
        '/content/gdrive/My Drive/Project3ML_dataset/Data/train',
        target_size=(120, 160),
        batch_size=32,
        class_mode='binary',
        subset="training",
        color_mode="rgb")



Found 3098 images belonging to 2 classes.


In [50]:
# Reading validation dataset from the google drive directory
validation_generator = train_datagen.flow_from_directory(
        '/content/gdrive/My Drive/Project3ML_dataset/Data/train',
        target_size=(120, 160),
        batch_size=32,
        class_mode='binary',
        subset="validation",
        color_mode="rgb")




Found 774 images belonging to 2 classes.


In [51]:
# Reading testing set from the google drive directory
test_generator = test_datagen.flow_from_directory(
        '/content/gdrive/My Drive/Project3ML_dataset/Data/test',
        target_size=(120, 160),
        batch_size=32,
         class_mode='binary',
        color_mode="rgb")



Found 860 images belonging to 2 classes.


### **Point 4: Load the weights file from the google drive directory**

In [0]:
# load the weights file from the google drive directory

model.load_weights("/content/gdrive/My Drive/Project3ML_dataset/Weights/weights.hdf5")

### **Point 5: Do Keras Checkpointing and save the best weights in a file in the google drive**

In [0]:
# https://keras.io/callbacks/#modelcheckpoint
# mention the path to save the best weights to continue training from it in case training was interrupted
best_weights_file_path = "/content/gdrive/My Drive/Project3ML_dataset/Weights/best_weights.hdf5"

#DO the checkpointing to save the weights in case it was better than the saved best weights
check_point = ModelCheckpoint( best_weights_file_path, 
    save_best_only=True, 
    save_weights_only=True,
    monitor='acc',
    period=1)

### **Point 3: Use the fit_generator to train the mdoel and specify the steps per epoch and the validation steps**

**The steps per epoch is calculated using the the total number of samples (train_genertor.n) divided by the batch size (train_generator.batch_size which in our case is 32) so that we specify to the training model to do these steps in each epoch to go through the whole training/validation dataset in each epoch**

**For example: if the dataset had 10,000 images and a batch size of 100 then the epoch should contain 10,000 / 100 = 100 steps. In our case, the batch size is 32.**

**We do the same for the validation dataset (validation_generator.n divided by validation_generator.batch_size), we also train the model on 30 epochs of the whole set and use the checkpoints that we specified earlier**

In [54]:
Train_steps = train_generator.n // train_generator.batch_size
Validation_steps = validation_generator.n // validation_generator.batch_size

trained_model = model.fit_generator(
    train_generator,
    validation_data=validation_generator,
    steps_per_epoch=Train_steps, 
    verbose=1,
    epochs= 30,
    validation_steps=Validation_steps, 
    callbacks=[check_point])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [56]:
best_training_accuracy = max(trained_model.history['acc'])
best_training_loss = min(trained_model.history['loss'])
print("Best Training Loss: ", best_training_loss)
print("Best Training Accuracy: ", best_training_accuracy)

best_validation_accuracy = max(trained_model.history['val_acc'])
best_validation_loss = min(trained_model.history['val_loss'])
print("Best Validation Loss: ", best_validation_loss)
print("Best Validation Accuracy: ", best_validation_accuracy)

Best Training Loss:  0.04350427738688522
Best Training Accuracy:  0.9905414218149851
Best Validation Loss:  0.5813197929341838
Best Validation Accuracy:  0.9164420483568608


### **Point 6: We evaluate our model using the testing dataset and specify the testing steps size as we did in the training and validation earlier**

In [57]:
Testing_steps = test_generator.n // test_generator.batch_size

loss, accuracy = model.evaluate_generator(test_generator, verbose=1, steps=Testing_steps)
print("Testing Loss: ", loss)
print("Testing Accruacy: ", accuracy)

Testing Loss:  0.7701649989640487
Testing Accruacy:  0.90625
