## Image Recognition with CNNs using Tensorflow/Keras

blah blah blah

```sh
pip -m install tensorflow
pip -m install pandas
pip -m install numpy
```

Read in file names from directories, convert images to numpy arrays of rgb values, save training and test arrays, label data and save labels.

In [2]:
import os, cv2, random, h5py
import numpy as np
import pandas as pd
import pickle

import matplotlib.pyplot as plt
from matplotlib import ticker
import seaborn as sns

#get file names for all train and test images
os.chdir('/Users/stevenhurwitt/Documents/Python/convnet/')
TRAIN_DIR = '/Users/stevenhurwitt/Documents/Python/convnet/train/'
TEST_DIR = '/Users/stevenhurwitt/Documents/Python/convnet/test/'

ROWS = 256
COLS = 256
CHANNELS = 3

#train_images = [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR)] # use this for full dataset
train_dogs =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'dog' in i]
train_cats =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'cat' in i]

test_images =  [TEST_DIR+i for i in os.listdir(TEST_DIR)]


train_images = train_dogs + train_cats
random.shuffle(train_images)
test_images =  test_images

#read the image into a matrix of rgb values
def read_image(file_path):
    img = cv2.imread(file_path, cv2.IMREAD_COLOR) #cv2.IMREAD_GRAYSCALE
    b,g,r = cv2.split(img)
    img2 = cv2.merge([r,g,b])
    return cv2.resize(img2, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)

#read in all images to data frame
def prep_data(images):
    count = len(images)
    data = np.ndarray((count, CHANNELS, ROWS, COLS), dtype=np.uint8)

    for i, image_file in enumerate(images):
        image = read_image(image_file)
        data[i] = image.T
        if i%10000 == 0: print('Processed {} of {}'.format(i, count))
    
    return data


train = prep_data(train_images)
test = prep_data(test_images)

print("Train shape: {}".format(train.shape))
print("Test shape: {}".format(test.shape))

print("saving images as numpy arrays...")
np.savez("train", train)
np.savez("test", test)

labels = []
for i in train_images:
    if 'dog' in i:
        labels.append(1)
    else:
        labels.append(0)

print("saving labels...")
with open('labels.data', 'wb') as filehandle:  
    # store the data as binary data stream
    pickle.dump(labels, filehandle)


  from ._conv import register_converters as _register_converters


Processed 0 of 25000
Processed 10000 of 25000
Processed 20000 of 25000
Processed 0 of 12500
Processed 10000 of 12500
Train shape: (25000, 3, 256, 256)
Test shape: (12500, 3, 256, 256)
saving images as numpy arrays...
saving labels...


load data, define model, run model, save predictions & model history, 

In [None]:
from keras import backend as K
from keras.models import Sequential
from keras.layers import Input, Dropout, Flatten, Conv2D, MaxPooling2D, Dense, Activation
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.utils import np_utils
from keras.models import model_from_json
from keras.models import load_model

train_npz = np.load("train.npz")
test_npz = np.load("test.npz")

train = train_npz['arr_0']
test = test_npz['arr_0']

with open('labels.data', 'rb') as filehandle:  
    # read the data as binary data stream
    labels = pickle.load(filehandle)

optimizer = RMSprop(lr=1e-4)
objective = 'binary_crossentropy'

def catdog():
    
    model = Sequential()

    model.add(Conv2D(32, 3, padding='same', input_shape=train.shape[1:], activation='relu'))
    model.add(Conv2D(32, 3, padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print("First layer...")
    model.add(Conv2D(64, 3, padding='same', activation='relu'))
    model.add(Conv2D(64, 3, padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print("Second layer...")
    model.add(Conv2D(128, 3, padding='same', activation='relu'))
    model.add(Conv2D(128, 3, padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print("Third layer...")
    model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))

    print("Flattening, etc...")
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    print("Compiling model...")
    model.compile(loss=objective, optimizer=optimizer, metrics=['accuracy'])
    return model

print("Creating model:")
model = catdog()

epochs = 10
batch_size = 16

## Callback for loss logging per epoch
class LossHistory(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.val_losses = []
        
    def on_epoch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))

early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='auto')        
       

def run_catdog():
    
    history = LossHistory()
    print("running model...")
    model.fit(train, labels, batch_size=batch_size, epochs=epochs,
              validation_split=0.25, verbose=2, shuffle=True, callbacks=[history, early_stopping])

    print("saving model...")
    model_json = model.to_json()
    with open("catdog.json", "w") as json_file:
        json_file.write(model_json)
    model.save_weights("catdog.h5")
    
    print("making predictions on test set...")
    predictions = model.predict(test, verbose=0)
    return predictions, history

predictions, history = run_catdog()

#save model, predictions & history
model.save('catdog.hdf5')

with open('preds.data', 'wb') as filehandle:  
    pickle.dump(predictions, filehandle)

with open('history.data', 'wb') as filehandle:  
    pickle.dump(history, filehandle)

Using TensorFlow backend.


load data, labels, model, predictions & history. plot loss over epochs on training & validation data, show sample predictions.

In [None]:

#load training/test data
print("loading data...")

train_npz = np.load("train.npz")
test_npz = np.load("test.npz")

train = train_npz['arr_0']
test = test_npz['arr_0']

#load labels
print("loading labels...")

with open('labels.data', 'rb') as filehandle:  
    # read the data as binary data stream
    labels = pickle.load(filehandle)


#load model
print("loading model...")
json_file = open('catdog.json', 'r')

loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

loaded_model.load_weights("catdog.h5")
print("loaded model from disk")

loaded_model=load_model('catdog.hdf5')

print("loading predictions")

with open('preds.data', 'rb') as filehandle:  
    predictions = pickle.load(filehandle)

with open('history.data', 'rb') as filehandle:  
    history = pickle.load(filehandle)


loss = history.losses
val_loss = history.val_losses

json_file = open('catdog.json', 'r')

loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

loaded_model.load_weights("catdog.h5")
print("Loaded model from disk")

loaded_model=load_model('catdog.hdf5')

plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('VGG-16 Loss Trend')
plt.plot(loss, 'blue', label='Training Loss')
plt.plot(val_loss, 'green', label='Validation Loss')
plt.xticks(range(0,epochs)[0::2])
plt.legend()
plt.show()
   
#show sample of predictions
for i in range(0,10):
    if predictions[i, 0] >= 0.5: 
        print('I am {:.2%} sure this is a Dog'.format(predictions[i][0]))
    else: 
        print('I am {:.2%} sure this is a Cat'.format(1-predictions[i][0]))
        
    plt.imshow(test[i].T)
    plt.pause(0)
