# Convolutional Neural Networks

## Import libraries

In [1]:
import os
import tensorflow
os.environ['KERAS_BACKEND'] = 'tensorflow'

import wget
import numpy as np
import pandas as pd
import shutil
import zipfile
import warnings
from distutils.dir_util import copy_tree
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder

from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, Flatten, Dense

Using TensorFlow backend.


## Download and extract dataset

In [2]:
if os.path.exists('data'):
    print("Path already exists")
else:
    print("Creating the data directory")
    os.mkdir('data')
    print("Directory created")

Path already exists


In [3]:
data_url = 'https://sds-platform-private.s3-us-east-2.amazonaws.com/uploads/P16-Convolutional-Neural-Networks.zip'
file_name = data_url.split("/")[-1]
if not os.path.exists('data/training_set'):
    print('Need to download dataset')
    wget.download(data_url, out = 'data')
    print('Data downloaded')
    with zipfile.ZipFile('data/' + file_name, 'r') as file:
        print("Extracting dataset")
        file.extractall()
        copy_tree('Convolutional_Neural_Networks/dataset/', 'data/')
        print("Dataset extracted and created")
    try:
        shutil.rmtree('Convolutional_Neural_Networks')
        os.remove('data/' + file_name)
        print("Extra files removed")
    except:
        print("Could not delete extra files")
else:
    print('Dataset available')

Dataset available


## Building the model

In [4]:
classifier = Sequential()
classifier.add(Convolution2D(filters = 32, kernel_size = (3, 3), input_shape = (3, 64, 64), activation = 'relu', data_format = 'channels_first'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))
classifier.add(Flatten())
classifier.add(Dense(units = 128, activation = 'relu'))
classifier.add(Dense(units = 1, activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
classifier.summary()

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 62, 62)        896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 31, 31)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 30752)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               3936384   
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 129       
Total params: 3,937,409
Trainable params: 3,937,409
Non-trainable params: 0
_________________________________________________________________


## Data Augmentation

In [5]:
from keras.preprocessing.image import ImageDataGenerator

In [6]:
train_datagen = ImageDataGenerator(rescale = 1.0/255, 
                                   shear_range = 0.2, 
                                   zoom_range = 0.2, 
                                   horizontal_flip = True)

test_datagen = ImageDataGenerator(rescale = 1.0/255)

training_set = train_datagen.flow_from_directory('data/training_set', 
                                                 target_size = (64, 64), 
                                                 batch_size = 32, 
                                                 class_mode = 'binary')

test_set = test_datagen.flow_from_directory('data/test_set', 
                                            target_size = (64, 64), 
                                            batch_size = 32, 
                                            class_mode = 'binary')

Found 8000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [7]:
classifier.fit_generator(training_set,
                         steps_per_epoch = 8000/32,
                         epochs = 25, 
                         validation_data = test_set,
                         validation_steps = 2000/32,
                         verbose = 1)

Instructions for updating:
Use tf.cast instead.
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x10d977198>

## Improving the model to get test accuracy around 80%

In [8]:
classifier = Sequential()
classifier.add(Convolution2D(filters = 32, kernel_size = (3, 3), input_shape = (3, 64, 64), activation = 'relu', data_format = 'channels_first'))
classifier.add(Convolution2D(filters = 32, kernel_size = (3, 3), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))
classifier.add(Flatten())
classifier.add(Dense(units = 128, activation = 'relu'))
classifier.add(Dense(units = 1, activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
classifier.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 32, 62, 62)        896       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 32, 60, 60)        9248      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 32, 30, 30)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 28800)             0         
_________________________________________________________________
dense_3 (Dense)              (None, 128)               3686528   
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 129       
Total params: 3,696,801
Trainable params: 3,696,801
Non-trainable params: 0
_________________________________________________________________


In [9]:
classifier.fit_generator(training_set,
                         steps_per_epoch = 8000/32,
                         epochs = 25, 
                         validation_data = test_set,
                         validation_steps = 2000/32,
                         verbose = 1)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x10c988748>

By introducing a new Convolution layer, I was able to have the test accuracy as 79.40%