In [1]:
# following along with https://elitedatascience.com/keras-tutorial-deep-learning-in-python (MNIST dataset) -mt
# attempting to fit model for cats and dogs kaggle dataset: https://www.kaggle.com/c/dogs-vs-cats -mt

### Step 3: Import libraries and modules

In [2]:
# importing numpy and setting a seed for reproductibility
import numpy as np
np.random.seed(123) # kinda irrelevant. not really using randomness. -mt

import os

In [3]:
# importing standard keras modules/layers/utilities
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils

from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K

Using TensorFlow backend.


In [4]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

In [5]:
from matplotlib import pyplot as plt
%matplotlib inline

#### Second attempt using: https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html -mt

Directory structure looks like:
```
small_training_dir/
    train/
        dogs/
            dog001.jpg
            dog002.jpg
            ...
        cats/
            cat001.jpg
            cat002.jpg
            ...
    validation/
        dogs/
            dog001.jpg
            dog002.jpg
            ...
        cats/
            cat001.jpg
            cat002.jpg
            ...
```            

### Step 4: Point to images for dataset creation (later)

In [6]:
# dimensions of our images.
img_width, img_height = 150, 150

train_data_dir = '/Users/minhgeneralassembly/Downloads/kaggle_catsanddogs/small_training_dir/train'
validation_data_dir = '/Users/minhgeneralassembly/Downloads/kaggle_catsanddogs/small_training_dir/validation'
nb_train_samples = 1000
nb_validation_samples = 200
epochs = 30
batch_size = 16

In [7]:
# reshape array shapes as needed (tensorflow vs theano)
if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)

### Step 5: Define model

In [8]:
model = Sequential()
model.add(Convolution2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

In [9]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 148, 148, 32)      896       
_________________________________________________________________
activation_1 (Activation)    (None, 148, 148, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 72, 72, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 72, 72, 32)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 36, 36, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 34, 34, 64)        18496     
__________

In [10]:
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

### Step 6: Data augmentation

In [11]:
# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
    rotation_range=40,
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

In [12]:
# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1. / 255)

In [13]:
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

Found 1000 images belonging to 2 classes.


In [14]:
validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

Found 200 images belonging to 2 classes.


#### Test data augmentation

In [15]:
# img = load_img('/Users/minhgeneralassembly/Downloads/kaggle_catsanddogs/small_training_dir/train/cats/cat.0.jpg')  # this is a PIL image
# x = img_to_array(img)  # this is a Numpy array with shape (3, 150, 150)
# x = x.reshape((1,) + x.shape)  # this is a Numpy array with shape (1, 3, 150, 150)

In [16]:
# i = 0
# for batch in train_datagen.flow(x, batch_size=1,
#                           save_to_dir='/Users/minhgeneralassembly/Downloads/kaggle_catsanddogs/preview', save_prefix='cat', save_format='jpeg'):
#     i += 1
#     if i > 20:
#         break  # otherwise the generator would loop indefinitely

### Step 7: Fit model

In [17]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


### Step 7.5: Save model

In [None]:
model.save('catsndogs.h5')

### Step 7.9: Reload model (if needed)

In [None]:
# model = load_model('catsndogs.h5')

### Step 8: Evaluate model

In [None]:
print(history.history.keys())

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model acc')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

### Step 10: Test things

In [None]:
def cropResizeImages(img):
    width, height = img.size

    if width > height:
        left = (width - height)/2
        top = (height - height)/2
        right = (width + height)/2
        bottom = (height + height)/2
        
        img = img.crop((left, top, right, bottom))
        img = img.resize([150,150])
    
    elif height > width:
        left = (width - width)/2
        top = (height - width)/2
        right = (width + width)/2
        bottom = (height + width)/2
        
        img = img.crop((left, top, right, bottom))
        img = img.resize([150,150])
        
    return img

In [None]:
# test_img = "/Users/minhgeneralassembly/Downloads/kaggle_catsanddogs/small_testing_dir/11.jpg" # cat
# test_img = "/Users/minhgeneralassembly/Downloads/kaggle_catsanddogs/small_testing_dir/27.jpg" # dog
# test_img = "/Users/minhgeneralassembly/Downloads/kaggle_catsanddogs/small_testing_dir/82.jpg" # cat
test_img = "/Users/minhgeneralassembly/Downloads/kaggle_catsanddogs/small_training_dir/train/cats/cat.100.jpg" #cat, training
# test_img = "/Users/minhgeneralassembly/Downloads/kaggle_catsanddogs/small_training_dir/train/dogs/dog.100.jpg" #dog

In [None]:
test_img = load_img(test_img)

In [None]:
test_img_cropped = cropResizeImages(test_img)
test_img_in = np.array(test_img_cropped)
plt.imshow(test_img_in);

In [None]:
preds = model.predict_classes(test_img_in[np.newaxis, :, :, :])
probs = model.predict_proba(test_img_in[np.newaxis, :, :, :])
print(preds, probs)

In [None]:
break

### Step 8: Run images through model and sort according to predictions

In [None]:
cleaning_dir = "/Users/minhgeneralassembly/Downloads/kaggle_catsanddogs/small_testing_dir"

In [None]:
for filename in os.listdir(cleaning_dir):
    if filename.endswith('.jpg'):
        with open(os.path.join(cleaning_dir, filename)) as f:
            content = f.read()
            print con

### Step 6: Preprocess class labels for Keras

In [None]:
# taking a look at the shape of our class label data
print y_train.shape

In [None]:
# expecting 10 classes (one for each digit)

In [None]:
# looking at labels for first 10 training samples
print y_train[:10]

In [None]:
# preprocess class labels
# Convert 1-dimensional class arrays to 10-dimensional class matrices
Y_train = np_utils.to_categorical(y_train, 10)
Y_test = np_utils.to_categorical(y_test, 10)

In [None]:
# resulting shape
print Y_train.shape

### Step 7: Define model architecture

In [None]:
# using a pre-defined / proven architecture for this
# declare sequential model
model = Sequential()

In [None]:
# declare CNN input layer
# added dim_ordering='th' since i'm using tensorflow and it has different ways of ordering dimensions
# model.add(Convolution2D(32, (3, 3), activation='relu', input_shape=(1,28,28), dim_ordering='th'))
model.add(Convolution2D(32, (3, 3), activation='relu', input_shape=(1,28,28), data_format="channels_first"))

In [None]:
print model.output_shape

In [None]:
# adding more layers to model
model.add(Convolution2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

In [None]:
# fully connected dense layers
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

### Step 8. Compile model

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

### Step 8.5. Validation set

In [None]:
# Create %20 validation set
X_val = X_train[:(len(X_train)/5)]
partial_X_train = X_train[(len(X_train)/5):]

Y_val = Y_train[:(len(X_train)/5)]
partial_Y_train = Y_train[(len(X_train)/5):]

### Step 9: Fit model on training data

In [None]:
# normal
# history = model.fit(X_train, Y_train, batch_size=32, epochs=2, verbose=1)

In [None]:
history = model.fit(partial_X_train,
                    partial_Y_train,
                    epochs=5,
                    batch_size=32,
                    verbose=1,
                    validation_data=(X_val, Y_val))

In [None]:
print(history.history.keys())

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

### Step 10: Evaluate model on test data

In [None]:
score = model.evaluate(X_test, Y_test, verbose=0)

In [None]:
print score

### Goal: Unshape image to look at, then predict on

In [None]:
test_image = X_test[999]

In [None]:
# view test_image (undoing the transformation on a copy)
test_image_in = test_image
test_image_in *= 255
test_image_in = test_image_in.reshape(28, 28)
plt.imshow(test_image_in, cmap=plt.cm.binary)

In [None]:
# predict on thing

In [None]:
model.predict_classes(test_image[np.newaxis, :, :, :])

In [None]:
model.predict(test_image[np.newaxis, :, :, :])