In [1]:
import os
import sys
import cv2

import random

import numpy as np

from tqdm import tqdm
import pickle

from keras.models import Sequential 
from keras.layers import Conv2D, MaxPooling2D 
from keras.layers import Activation, Dropout, Flatten, Dense

from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint

import matplotlib.pyplot as plt

%matplotlib inline

Using TensorFlow backend.


In [2]:
os.listdir('data')

['train', 'test', 'sample_submission.csv']

In [3]:
len(os.listdir('data/train'))

25000

In [4]:
train_images = os.listdir('data/train')
test_images = os.listdir('data/test')

print(len(train_images))
print(len(test_images))

25000
12500


In [5]:
train_images[:10]

['cat.4213.jpg',
 'cat.7203.jpg',
 'dog.8250.jpg',
 'dog.7907.jpg',
 'dog.2318.jpg',
 'cat.6480.jpg',
 'dog.7973.jpg',
 'dog.2225.jpg',
 'dog.611.jpg',
 'dog.5955.jpg']

In [6]:
test_images[:10]

['1818.jpg',
 '374.jpg',
 '1681.jpg',
 '2880.jpg',
 '2001.jpg',
 '11288.jpg',
 '4282.jpg',
 '7510.jpg',
 '11638.jpg',
 '8288.jpg']

In [7]:
test_images_data = []

for image in tqdm(test_images):
    image_data = cv2.imread('data/test/' + image)
    
    #Convert to GrayScale
    gray = cv2.cvtColor(image_data, cv2.COLOR_BGR2GRAY)
    
    #convert color from BGR to RGB
    #image_data = cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB)
    
    image_data = cv2.resize(gray, (64, 64))
    
    #turn to only borders
    image_data = cv2.Canny(image_data, 150, 150)
    
    test_images_data.append(image_data)

100%|██████████| 12500/12500 [00:51<00:00, 243.99it/s]


In [8]:
test_images_data = np.array(test_images_data)

In [13]:
train_images_data = []
train_images_labels = []

random.shuffle(train_images)

for image in tqdm(train_images):
    image_data = cv2.imread('data/train/' + image)
    
    #Convert to GrayScale
    gray = cv2.cvtColor(image_data, cv2.COLOR_BGR2GRAY)
    
    #convert color from BGR to RGB
    #image_data = cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB)
    
    image_data = cv2.resize(gray, (64, 64))
    
    #turn to only borders
    image_data = cv2.Canny(image_data, 150, 150)
    
    train_images_data.append(image_data)
    train_images_data.append(image_data[:, ::-1]) #flipped image
    
    
    if image.startswith('cat'):
        train_images_labels.append(0)
        train_images_labels.append(0)
        #train_images_labels.append([0, 1])
    else:
        train_images_labels.append(1)
        train_images_labels.append(1)
        #train_images_labels.append([1, 0])

100%|██████████| 25000/25000 [01:40<00:00, 248.21it/s]


In [14]:
train_images_data = np.array(train_images_data)
train_images_labels = np.array(train_images_labels)

train_images_data = train_images_data.reshape([-1, 64, 64, 1])

In [15]:
pickle.dump(train_images_data, open('model_data/edges_train_images_data.pck', 'wb'))

In [16]:
pickle.dump(test_images_data, open('model_data/edges_test_images_data.pck', 'wb'))

In [17]:
pickle.dump(train_images_labels, open('model_data/edges_train_images_labels.pck', 'wb'))

In [22]:
model_name = 'edges_model_epochs_100_conv_3_by_3_dropout' #

In [23]:
model = Sequential() 
model.add(Conv2D(64, (3, 3), input_shape=(64, 64, 1))) 
model.add(Activation('relu')) 
model.add(MaxPooling2D(pool_size=(2, 2))) 
  
model.add(Conv2D(32, (3, 3))) 
model.add(Activation('relu')) 
model.add(MaxPooling2D(pool_size=(2, 2))) 
  
model.add(Conv2D(16, (3, 3))) 
model.add(Activation('relu')) 
model.add(MaxPooling2D(pool_size=(2, 2))) 
  
model.add(Flatten()) 
model.add(Dense(50)) 
model.add(Activation('relu')) 
model.add(Dropout(0.5)) 
model.add(Dense(1)) 
model.add(Activation('sigmoid')) 

model.compile(loss='binary_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy']) 

tensorboard = TensorBoard(log_dir="logs/{}".format(model_name))
early_stop = EarlyStopping(monitor='val_loss', patience=5)
checkpoint = ModelCheckpoint(filepath='model_data/' + model_name + '_checkpoint.h5', monitor='val_loss', 
                             save_best_only=True)

model.fit(train_images_data, train_images_labels, epochs=100, validation_split=0.2, 
          callbacks=[tensorboard, early_stop, checkpoint])

Train on 40000 samples, validate on 10000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100


<keras.callbacks.History at 0x7fdf54428b00>

In [20]:
#save the data as pickle files
#save the model
#run on test example images, about 10 to see classification

In [24]:
model.save('models/' + model_name + '.h5')