In [0]:
##Objectives
#to read files from google drive (data files)
#train various types of CNNs on the microsoft cats, dogs dataset
#1. convnet with scratch learning
#2. convnet with transfer learning on imagenet
#3. convnet with fine-tuning only 

In [2]:
import os, shutil
import keras
import numpy as np

Using TensorFlow backend.


In [3]:
#note if you get a file already exists error, delete the corresponding folder, 
#or check by if not os.path.exists('new_dir_path') etc. 
data_dir= '/content/gdrive/My Drive/workspace/colab_notebooks/data/kagglecatsanddogs_3367a'
small_dir = '/mnt/data/ppaudyal/cats_and_dogs_small'
#os.mkdir(small_dir)
train_dir= os.path.join(small_dir, 'train')
#os.mkdir(train_dir)
test_dir = os.path.join(small_dir, 'test')
#os.mkdir(test_dir)
validation_dir = os.path.join(small_dir, 'validation')
#os.mkdir(validation_dir)

In [0]:
train_cats_dir = os.path.join(train_dir, 'cats')
#os.mkdir(train_cats_dir)
train_dogs_dir = os.path.join(train_dir, 'dogs')
#os.mkdir(test_dogs_dir)

In [0]:
test_cats_dir = os.path.join(test_dir, 'cats')
#os.mkdir(test_cats_dir)
test_dogs_dir = os.path.join(test_dir, 'dogs')
#os.mkdir(validation_dogs_dir)

In [0]:
validation_cats_dir = os.path.join(validation_dir, 'cats')
#os.mkdir(validation_cats_dir)
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
#os.mkdir(validation_dogs_dir)

In [0]:
#NOTE:to run only once!
#copy first 1000 as train, next 500 as test and the final 500 as validation
#first with dog train
fnames = [str(i)+'.jpg' for i in range(1000)]
for fname in fnames:
  src = os.path.join(data_dir, 'Dog', fname)
  dst = os.path.join(small_dir, 'train', 'dogs', fname)
  shutil.copyfile(src, dst)

In [0]:
#NOTE:to run only once!
#repeat for Cat
fnames = [str(i)+'.jpg' for i in range(1000)]
for fname in fnames:
  src = os.path.join(data_dir, 'Cat', fname)
  dst = os.path.join(small_dir, 'train', 'cats', fname)
  shutil.copyfile(src, dst)

In [0]:
#NOTE:to run only once!
fnames = [str(i)+'.jpg' for i in range(1000, 1500)]
for fname in fnames:
  src = os.path.join(data_dir, 'Dog', fname)
  dst = os.path.join(small_dir, 'test', 'dogs', fname)
  shutil.copyfile(src, dst)

In [0]:
#NOTE:to run only once!
#repeat for Cat
fnames = [str(i)+'.jpg' for i in range(1000, 1500)]
for fname in fnames:
  src = os.path.join(data_dir, 'Cat', fname)
  dst = os.path.join(small_dir, 'test', 'cats', fname)
  shutil.copyfile(src, dst)

In [0]:
#NOTE:to run only once!
fnames = [str(i)+'.jpg' for i in range(1500, 2000)]
for fname in fnames:
  src = os.path.join(data_dir, 'Dog', fname)
  dst = os.path.join(small_dir, 'validation', 'dogs', fname)
  shutil.copyfile(src, dst)

In [0]:
#NOTE:to run only once!
#repeat for Cat
fnames = [str(i)+'.jpg' for i in range(1500, 2000)]
for fname in fnames:
  src = os.path.join(data_dir, 'Cat', fname)
  dst = os.path.join(small_dir, 'validation', 'cats', fname)
  shutil.copyfile(src, dst)

In [4]:
#NOTE:to run only once!
#A small convnet for dogs vs. cats classification
import keras.layers as layers
import keras.models as models

model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', 
                        input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(128, (3,3), activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [6]:
import keras.optimizers as optimizers
model.compile(optimizer=optimizers.RMSprop(lr=1e-4), metrics=['acc'], loss='binary_crossentropy')


In [7]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 15, 15, 128)       147584    
__________

In [0]:
#Preprocessing the data 
#1. Read the picture files
#2. Decode JPEG content to RGB grids of pixels
#3. Convert these to floating-point tensors 
#4. Resclae to [0, 1] 

In [9]:
ImageDataGenerator = keras.preprocessing.image.ImageDataGenerator

In [10]:
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [11]:
train_generator = train_datagen.flow_from_directory(train_dir, target_size=(150, 150), batch_size=20, class_mode='binary')

Found 2000 images belonging to 2 classes.


In [12]:
validation_generator = test_datagen.flow_from_directory(validation_dir, target_size=(150, 150), batch_size=20, class_mode='binary')

Found 1000 images belonging to 2 classes.


In [13]:
for data_batch, labels_batch in train_generator:
  print('data shape:', data_batch.shape)
  print('labels shape:', labels_batch.shape)
  break

data shape: (20, 150, 150, 3)
labels shape: (20,)


In [None]:
#fitting the model
history = model.fit_generator(train_generator, steps_per_epoch=100, epochs=30, validation_data=validation_generator, validation_steps=50)

Epoch 1/30
