In [1]:
import os
import sys
import cv2

import random

import numpy as np

from tqdm import tqdm
import pickle

from keras.models import Sequential 
from keras.layers import Conv2D, MaxPooling2D 
from keras.layers import Activation, Dropout, Flatten, Dense

from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint

import matplotlib.pyplot as plt

%matplotlib inline

Using TensorFlow backend.


In [2]:
os.listdir('data')

['train', 'test', 'sample_submission.csv']

In [3]:
len(os.listdir('data/train'))

25000

In [4]:
train_images = os.listdir('data/train')
test_images = os.listdir('data/test')

print(len(train_images))
print(len(test_images))

25000
12500


In [5]:
train_images[:10]

['cat.4213.jpg',
 'cat.7203.jpg',
 'dog.8250.jpg',
 'dog.7907.jpg',
 'dog.2318.jpg',
 'cat.6480.jpg',
 'dog.7973.jpg',
 'dog.2225.jpg',
 'dog.611.jpg',
 'dog.5955.jpg']

In [6]:
test_images[:10]

['1818.jpg',
 '374.jpg',
 '1681.jpg',
 '2880.jpg',
 '2001.jpg',
 '11288.jpg',
 '4282.jpg',
 '7510.jpg',
 '11638.jpg',
 '8288.jpg']

In [7]:
test_images_data = []

for image in tqdm(test_images):
    image_data = cv2.imread('data/test/' + image)

    #convert color from BGR to RGB
    image_data = cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB)    
    image_data = cv2.resize(image_data, (64, 64))
    
    test_images_data.append(image_data)

100%|██████████| 12500/12500 [05:19<00:00, 39.11it/s]


In [8]:
test_images_data = np.array(test_images_data)

In [9]:
train_images_data = []
train_images_labels = []

random.shuffle(train_images)

for image in tqdm(train_images):
    image_data = cv2.imread('data/train/' + image)
    
    #convert color from BGR to RGB
    image_data = cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB)
    
    image_data = cv2.resize(image_data, (64, 64))

    train_images_data.append(image_data)
    train_images_data.append(image_data[:, ::-1]) #flipped image
    
    if image.startswith('cat'):
        train_images_labels.append(0)
        train_images_labels.append(0)
    else:
        train_images_labels.append(1)
        train_images_labels.append(1)

100%|██████████| 25000/25000 [07:40<00:00, 54.34it/s]


In [10]:
train_images_data = np.array(train_images_data)
train_images_labels = np.array(train_images_labels)

train_images_data = train_images_data.reshape([-1, 64, 64, 3])

In [11]:
pickle.dump(train_images_data, open('model_data/colored_train_images_data.pck', 'wb'))

In [12]:
pickle.dump(test_images_data, open('model_data/colored_test_images_data.pck', 'wb'))

In [13]:
pickle.dump(train_images_labels, open('model_data/colored_train_images_labels.pck', 'wb'))

In [2]:
file = open('model_data/colored_train_images_data.pck','rb')
train_images_data = pickle.load(file)

In [3]:
file = open('model_data/colored_test_images_data.pck','rb')
test_images_data = pickle.load(file)

In [4]:
file = open('model_data/colored_train_images_labels.pck','rb')
train_images_labels = pickle.load(file)

In [5]:
model_name = 'colored_model_epochs_100_conv_5_by_5_dropout_batch_size_32' #

In [6]:
model = Sequential() 
model.add(Conv2D(64, (5, 5), input_shape=(64, 64, 3))) 
model.add(Activation('relu')) 
model.add(MaxPooling2D(pool_size=(2, 2)))
#model.add(Dropout(0.25)) 
  
model.add(Conv2D(32, (3, 3))) 
model.add(Activation('relu')) 
model.add(MaxPooling2D(pool_size=(2, 2))) 
#model.add(Dropout(0.25)) 
  
model.add(Conv2D(16, (3, 3))) 
model.add(Activation('relu')) 
model.add(MaxPooling2D(pool_size=(2, 2))) 
#model.add(Dropout(0.25)) 
  
model.add(Flatten()) 
model.add(Dense(50)) #Increase to 100 and even 200 see how the accuracy performs
model.add(Activation('relu')) 
model.add(Dropout(0.5)) 
model.add(Dense(1)) 
model.add(Activation('sigmoid')) 

model.compile(loss='binary_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy']) 

tensorboard = TensorBoard(log_dir="logs/{}".format(model_name))
early_stop = EarlyStopping(monitor='val_loss', patience=8)
checkpoint = ModelCheckpoint(filepath='models/' + model_name + '.h5', monitor='val_loss', 
                             save_best_only=True)

model.fit(train_images_data, train_images_labels, epochs=100, validation_split=0.2, 
          callbacks=[tensorboard, early_stop, checkpoint], batch_size=32) #move batch size to 32, 64, 128
#and finally 256 see how accuracy performs

#remember to try out with dropout per layer

#next try a filter of 8 by 8
#and finally an image size of 80 by 80 with a smaller filter of 5 by 5 then 8 by 8

Train on 40000 samples, validate on 10000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100


<keras.callbacks.History at 0x7f4553150e48>