In [1]:
import os, cv2, random
import numpy as np
import pandas as pd


from keras.models import Sequential
from keras.layers import Input, Dropout, Flatten, Convolution2D, MaxPooling2D, Dense, Activation
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.utils import np_utils


Using TensorFlow backend.


In [118]:
TRAIN_DIR = './train/'
TEST_DIR = './test/'

ROWS = 128
COLS = 128
CHANNELS = 3

train_images = [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR)] # use this for full dataset
train_dogs =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'dog' in i]
train_cats =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'cat' in i]

test_images =  [TEST_DIR+i for i in os.listdir(TEST_DIR)]

train_images = train_dogs[:1000] + train_cats[:1000]
random.shuffle(train_images)
test_images =  test_images[:25]

In [119]:
def read_image(file_path):
    img = cv2.imread(file_path, cv2.IMREAD_COLOR) #cv2.IMREAD_GRAYSCALE
    return cv2.resize(img, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)


def prep_data(images):
    count = len(images)
    data = np.ndarray((count, CHANNELS, ROWS, COLS), dtype=np.uint8)

    for i, image_file in enumerate(images):
        image = read_image(image_file)
        data[i] = image.T
        if i%250 == 0: print('Processed {} of {}'.format(i, count))
    
    return data

train = prep_data(train_images)
test = prep_data(test_images)

print("Train shape: {}".format(train.shape))
print("Test shape: {}".format(test.shape))

Processed 0 of 2000
Processed 250 of 2000
Processed 500 of 2000
Processed 750 of 2000
Processed 1000 of 2000
Processed 1250 of 2000
Processed 1500 of 2000
Processed 1750 of 2000
Processed 0 of 25
Train shape: (2000, 3, 128, 128)
Test shape: (25, 3, 128, 128)


In [121]:
labels = []
for i in train_images:
    if 'dog' in i:
        labels.append(1)
    else:
        labels.append(0)


In [44]:
optimizer = RMSprop(lr=1e-4)
objective = 'binary_crossentropy'


def catdog():
    
    model = Sequential()

    model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape=(3, ROWS, COLS), activation='relu'))
    model.add(Convolution2D(32, 3, 3, border_mode='same', activation='relu'))
    #model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(MaxPooling2D(pool_size=(2, 2),dim_ordering="th"))


    model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))
    model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))
    #model.add(MaxPooling2D(pool_size=(2, 2)))


    model.add(MaxPooling2D(pool_size=(2, 2),dim_ordering="th"))
    
    model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu'))
    model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu'))
    #model.add(MaxPooling2D(pool_size=(2, 2)))


    model.add(MaxPooling2D(pool_size=(2, 2),dim_ordering="th"))

    
    model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu'))
    model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu'))
#     model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu'))
    #model.add(MaxPooling2D(pool_size=(2, 2)))


    model.add(MaxPooling2D(pool_size=(2, 2),dim_ordering="th"))


#     model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu'))
#     model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu'))
#     model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu'))
#     model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss=objective, optimizer=optimizer, metrics=['accuracy'])
    return model


model = catdog()

In [131]:
nb_epoch = 10
batch_size = 16

## Callback for loss logging per epoch
class LossHistory(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.val_losses = []
        
    def on_epoch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))

early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='auto')        
        
def run_catdog():
    
    history = LossHistory()
    model1.fit(train, labels, batch_size=batch_size, nb_epoch=nb_epoch,
              validation_split=0.25, verbose=0, shuffle=True, callbacks=[history, early_stopping])
    

    predictions = model1.predict(test, verbose=0)
    return predictions, history

#predictions, history = run_catdog()

In [48]:
predict = model.predict(test)

In [51]:
len(predict)

12500

In [52]:
for i in range(0,100):
    if predict[i, 0] >= 0.5: 
        print('I am {:.2%} sure this is a Dog'.format(predict[i][0]))
    else: 
        print('I am {:.2%} sure this is a Cat'.format(1-predict[i][0]))

I am 63.69% sure this is a Cat
I am 74.30% sure this is a Cat
I am 67.78% sure this is a Cat
I am 60.15% sure this is a Cat
I am 71.37% sure this is a Dog
I am 88.11% sure this is a Cat
I am 63.69% sure this is a Cat
I am 91.66% sure this is a Dog
I am 70.48% sure this is a Dog
I am 72.88% sure this is a Cat
I am 68.06% sure this is a Dog
I am 81.58% sure this is a Cat
I am 66.21% sure this is a Cat
I am 73.76% sure this is a Dog
I am 77.57% sure this is a Dog
I am 97.04% sure this is a Dog
I am 89.83% sure this is a Dog
I am 72.43% sure this is a Cat
I am 64.69% sure this is a Dog
I am 71.11% sure this is a Cat
I am 58.52% sure this is a Dog
I am 62.99% sure this is a Dog
I am 74.97% sure this is a Cat
I am 73.56% sure this is a Cat
I am 58.62% sure this is a Dog
I am 69.71% sure this is a Dog
I am 78.35% sure this is a Cat
I am 82.81% sure this is a Cat
I am 83.32% sure this is a Dog
I am 89.48% sure this is a Dog
I am 60.62% sure this is a Cat
I am 55.47% sure this is a Cat
I am 52.

In [132]:
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD
def VGG_16(weights_path=None):
    model = Sequential()
    model.add(Convolution2D(64, 3, 3, border_mode='same', input_shape=(3, ROWS, COLS), activation='relu'))
    model.add(Convolution2D(64, 3, 3, border_mode='same',  activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2), dim_ordering="th"))

    model.add(Convolution2D(128, 3, 3,  border_mode='same', activation='relu'))
    model.add(Convolution2D(128, 3, 3,  border_mode='same', activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2), dim_ordering="th"))


    model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu'))
    model.add(Convolution2D(256, 3, 3,  border_mode='same', activation='relu'))
    model.add(Convolution2D(256, 3, 3,  border_mode='same', activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2), dim_ordering="th"))


    model.add(Convolution2D(512, 3, 3,  border_mode='same',activation='relu'))
    model.add(Convolution2D(512, 3, 3,  border_mode='same', activation='relu'))
    model.add(Convolution2D(512, 3, 3,  border_mode='same', activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2), dim_ordering="th"))


    model.add(Convolution2D(512, 3, 3,  border_mode='same', activation='relu'))
    model.add(Convolution2D(512, 3, 3,  border_mode='same', activation='relu'))
    model.add(Convolution2D(512, 3, 3,  border_mode='same', activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2), dim_ordering="th"))


    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='softmax'))
    #sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    sgd = RMSprop(lr=1e-4)
    model.compile(optimizer=sgd, loss='binary_crossentropy')
    if weights_path:
        model.load_weights(weights_path)

    return model

In [133]:
model1 = VGG_16()
predictions, history = run_catdog()


Epoch 00004: early stopping


In [130]:
len(predictions[predictions>0.5])

25

In [117]:
for i in range(0,1000):
    if predictions[i, 0] >= 0.5: 
        print('I am {:.2%} sure this is a Dog'.format(predictions[i][0]))
    else: 
        print('I am {:.2%} sure this is a Cat'.format(1-predictions[i][0]))


I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog
I am 100.00% sure this is a Dog


IndexError: index 25 is out of bounds for axis 0 with size 25

In [110]:
predictions[predictions<0.5]

array([], dtype=float32)

In [75]:
test_images[1]

'./test/10.jpg'

In [76]:
test_id = [x.split("/")[2].split(".")[0] for x in test_images]

In [83]:
predictions  = [if [x>0.5]: x=0 else: x=1 for x in predictions]

SyntaxError: invalid syntax (<ipython-input-83-e31534ddb5a2>, line 1)

In [92]:
predictions[predictions>0.5] = 1
predictions[predictions<=0.5] = 0

12500