[View in Colaboratory](https://colab.research.google.com/github/pgurazada/ml-projects/blob/master/2018_06_08_cats_vs_dogs.ipynb)

##  Data IO

The first part of this workbook authorizes Google Drive to bring the data into the environment by providing access. 

In [0]:
from google.colab import auth
auth.authenticate_user()

In [7]:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse

from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()

import getpass

!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

Preconfiguring packages ...
Selecting previously unselected package cron.
(Reading database ... 18298 files and directories currently installed.)
Preparing to unpack .../00-cron_3.0pl1-128ubuntu5_amd64.deb ...
Unpacking cron (3.0pl1-128ubuntu5) ...
Selecting previously unselected package libapparmor1:amd64.
Preparing to unpack .../01-libapparmor1_2.11.0-2ubuntu17.1_amd64.deb ...
Unpacking libapparmor1:amd64 (2.11.0-2ubuntu17.1) ...
Selecting previously unselected package libdbus-1-3:amd64.
Preparing to unpack .../02-libdbus-1-3_1.10.22-1ubuntu1_amd64.deb ...
Unpacking libdbus-1-3:amd64 (1.10.22-1ubuntu1) ...
Selecting previously unselected package dbus.
Preparing to unpack .../03-dbus_1.10.22-1ubuntu1_amd64.deb ...
Unpacking dbus (1.10.22-1ubuntu1) ...
Selecting previously unselected package dirmngr.
Preparing to unpack .../04-dirmngr_2.1.15-1ubuntu8_amd64.deb ...
Unpacking dirmngr (2.1.15-1ubuntu8) ...
Selecting previously unselected package distro-info-data.
Preparing to unpack .../0


Creating config file /etc/apt/apt.conf.d/50unattended-upgrades with new version
invoke-rc.d: could not determine current runlevel
invoke-rc.d: policy-rc.d denied execution of start.
Setting up dirmngr (2.1.15-1ubuntu8) ...
Setting up cron (3.0pl1-128ubuntu5) ...
Adding group `crontab' (GID 102) ...
Done.
invoke-rc.d: could not determine current runlevel
invoke-rc.d: policy-rc.d denied execution of start.
Setting up libdbus-1-3:amd64 (1.10.22-1ubuntu1) ...
Setting up kmod (24-1ubuntu2) ...
Setting up libdbus-glib-1-2:amd64 (0.108-2) ...
Setting up python3-gi (3.24.1-2build1) ...
Setting up module-init-tools (24-1ubuntu2) ...
Setting up python3-software-properties (0.96.24.17) ...
Setting up dbus (1.10.22-1ubuntu1) ...
Setting up python-apt (1.4.0~beta3build2) ...
Setting up python3-dbus (1.2.4-1build3) ...
Setting up python-software-properties (0.96.24.17) ...
Setting up software-properties-common (0.96.24.17) ...
Processing triggers for libc-bin (2.26-0ubuntu2.1) ...
Processing trigg

In [0]:
!mkdir -p drive

In [0]:
!google-drive-ocamlfuse drive

In [53]:
!ls drive/data/train

cats  dogs


In [0]:
import os 

In [64]:
print(len(os.listdir('drive/data/train/dogs')), len(os.listdir('drive/data/train/cats')))
print(len(os.listdir('drive/data/validation/dogs')), len(os.listdir('drive/data/validation/cats')))

1000 1000
500 500


## Building the Model

Note that we have a balanced data set here, so accuracy is a good metric

In [0]:
from keras.preprocessing.image import ImageDataGenerator

In [0]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Activation, Dropout
from keras.optimizers import RMSprop
from keras.callbacks import EarlyStopping

from keras import backend as K

In [0]:
K.clear_session()

In [0]:
model = Sequential()

model.add(Conv2D(32, (3, 3), input_shape=(150, 150, 3)))
model.add(Activation('relu'))

model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))

model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))

model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))

model.add(MaxPooling2D((2, 2)))

model.add(Flatten())

model.add(Dense(512, activation='relu'))

model.add(Dense(1, activation='sigmoid'))

In [57]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 148, 148, 32)      896       
_________________________________________________________________
activation_1 (Activation)    (None, 148, 148, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
activation_2 (Activation)    (None, 72, 72, 64)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 34, 34, 128)       73856     
__________

In [0]:
model.compile(loss='binary_crossentropy', optimizer=RMSprop(lr=1e-4), metrics=['acc'])

In [0]:
# All images will be rescaled by 1./255
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [0]:
train_dir = 'drive/data/train'
validation_dir = 'drive/data/validation'

train_cats_dir = 'drive/data/train/cats'
train_dogs_dir = 'drive/data/train/dogs'

validation_cats_dir = 'drive/data/validation/cats'
validation_dogs_dir = 'drive/data/validation/dogs'

In [68]:
train_generator = train_datagen.flow_from_directory(train_dir, # This is the target directory
                                                    target_size=(150, 150), # All images will be resized to 150x150
                                                    batch_size=20,
                                                    class_mode='binary') # Since we use binary_crossentropy loss, we need binary labels

Found 2000 images belonging to 2 classes.


In [69]:
validation_generator = test_datagen.flow_from_directory(validation_dir,
                                                        target_size=(150, 150),
                                                        batch_size=20,
                                                        class_mode='binary')

Found 1000 images belonging to 2 classes.


In [70]:
for data_batch, labels_batch in train_generator:
    print('data batch shape:', data_batch.shape)
    print('labels batch shape:', labels_batch.shape)
    break

data batch shape: (20, 150, 150, 3)
labels batch shape: (20,)


In [71]:
model_output = model.fit_generator(train_generator,
                                   steps_per_epoch=100,
                                   epochs=30,
                                   validation_data=validation_generator,
                                   validation_steps=50,
                                   callbacks=[EarlyStopping(patience=2)])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30

Epoch 5/30
Epoch 6/30
Epoch 7/30

Epoch 8/30
Epoch 9/30
Epoch 10/30

Epoch 11/30


In [72]:
model_output.history

{'acc': [0.525,
  0.587500002682209,
  0.6469999977946281,
  0.708000001013279,
  0.7524999988079071,
  0.765,
  0.7705000013113021,
  0.7975000029802323,
  0.8115000021457672,
  0.8269999980926513,
  0.8379999965429306],
 'loss': [0.6952438390254975,
  0.668605745434761,
  0.6235778433084488,
  0.5836593180894851,
  0.5317618447542191,
  0.4996700033545494,
  0.4767307749390602,
  0.4481227374076843,
  0.4166129994392395,
  0.3987840074300766,
  0.37584931939840316],
 'val_acc': [0.5210000014305115,
  0.5860000014305115,
  0.6560000014305115,
  0.6549999982118606,
  0.6849999988079071,
  0.6879999995231628,
  0.7040000009536743,
  0.7059999996423721,
  0.7189999991655349,
  0.6950000035762787,
  0.6869999969005585],
 'val_loss': [0.6828444266319275,
  0.6581067955493927,
  0.6186458122730255,
  0.6082178544998169,
  0.5870812809467316,
  0.5862436300516128,
  0.5681061518192291,
  0.5694220405817032,
  0.5576211702823639,
  0.5688789123296738,
  0.6033456617593765]}

In [0]:
K.clear_session()

In [0]:
model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))

model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))

model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu'))

model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu'))

model.add(MaxPooling2D((2, 2)))

model.add(Flatten())

model.add(Dropout(0.5))

model.add(Dense(512, activation='relu'))

model.add(Dense(1, activation='sigmoid'))


In [90]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 15, 15, 128)       147584    
__________

In [0]:
model.compile(loss='binary_crossentropy',
              optimizer=RMSprop(lr=1e-4),
              metrics=['acc'])

In [0]:
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

In [0]:
test_datagen = ImageDataGenerator(rescale=1./255)

In [94]:
train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=(150, 150),
                                                    batch_size=32,
                                                    class_mode='binary')

Found 2000 images belonging to 2 classes.


In [95]:
validation_generator = test_datagen.flow_from_directory(validation_dir,
                                                        target_size=(150, 150),
                                                        batch_size=32,
                                                        class_mode='binary')

Found 1000 images belonging to 2 classes.


In [0]:
%%time

model_output = model.fit_generator(train_generator,
                                   steps_per_epoch=100,
                                   epochs=100,
                                   validation_data=validation_generator,
                                   validation_steps=50,
                                   callbacks=[EarlyStopping(patience=5)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100

Epoch 5/100
Epoch 6/100
Epoch 7/100

Epoch 8/100
Epoch 9/100
Epoch 10/100

Epoch 11/100
Epoch 12/100
Epoch 13/100

Epoch 14/100
Epoch 15/100
Epoch 16/100

Epoch 17/100
Epoch 18/100
Epoch 19/100

Epoch 20/100

In [0]:
model.save('drive/data/cats_and_dogs_small_2.h5')