In [28]:
# building a model
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization

# dealing with images
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.utils import to_categorical

# splitting into train and test
from sklearn.model_selection import train_test_split

# utilities
import random
import numpy as np
import pandas as pd
import os

In [9]:
data_dir = '../catdog_input'
training_dir = data_dir+'/train'
print(os.listdir(data_dir))

nb_train_imgs = len(os.listdir(training_dir))
print(nb_train_imgs, 'training images')

['train', 'sampleSubmission.csv', 'test1.zip']
25000  training images


In [51]:
imgs = os.listdir(training_dir)
labels = np.zeros((nb_train_imgs,), dtype='object')
for i, img in enumerate(imgs):
    label = img.split('.')[0]
    if label == 'dog':
        labels[i] = '1'
    else:
        labels[i] = '0'

df = pd.DataFrame({
    'filename': imgs,
    'category': labels
})

# look at a few samples
for i in range(10):
    print(labels[i])

<class 'str'> 

1
1
1
1
1
1
0
1
0
1


In [36]:
df.tail()

Unnamed: 0,category,filename
24995,0,cat.4228.jpg
24996,1,dog.2454.jpg
24997,1,dog.8236.jpg
24998,1,dog.2249.jpg
24999,1,dog.11217.jpg


In [27]:
img_wh = 128
img_d = 3
input_shape = (img_wh, img_wh, img_d)

model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 126, 126, 32)      896       
_________________________________________________________________
batch_normalization_1 (Batch (None, 126, 126, 32)      128       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 63, 63, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 63, 63, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 61, 61, 64)        18496     
_________________________________________________________________
batch_normalization_2

In [52]:
train_df, validate_df = train_test_split(df, test_size=0.20, random_state=42)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)

In [55]:
total_train = train_df.shape[0]
total_validate = validate_df.shape[0]

print(total_train)
print(total_validate)

20000
5000


In [63]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

# params
flow_training_dir = training_dir+'/'
img_dim = (img_wh, img_wh)
batch_size = 15

train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    flow_training_dir, 
    x_col='filename',
    y_col='category',
    target_size=img_dim,
    class_mode='binary',
    batch_size=batch_size
)

Found 20000 images belonging to 2 classes.


In [64]:
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    flow_training_dir, 
    x_col='filename',
    y_col='category',
    target_size=img_dim,
    class_mode='binary',
    batch_size=batch_size
)

Found 5000 images belonging to 2 classes.


In [65]:
nb_epochs = 15
perc_valid = 0.2
model.fit_generator(
    train_generator, 
    epochs=nb_epochs,
    validation_data=validation_generator,
    validation_steps=total_validate//batch_size,
    steps_per_epoch=total_train//batch_size
)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f4676534588>