work in progress :)

In [None]:
import os, random, cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

I borrowed some data preparation functions from [1](https://www.kaggle.com/jeffd23/catdognet-keras-convnet-starter) and [2](https://www.kaggle.com/kbhits/tensorflow-starter-kit-fixed).

In [None]:
TRAIN_DIR = "./data/cats_v_dogs/train/"
TEST_DIR = "./data/cats_v_dogs/test/"

WIDTH = 64
HEIGHT = 64
CHANNELS = 3
PIXEL_DEPTH = 255

NUM_TRAIN = 2000
NUM_VALID = 200


images = os.listdir(TRAIN_DIR)

random.shuffle(images)

train_images = images[ : NUM_TRAIN]
valid_images = images[NUM_TRAIN : NUM_TRAIN + NUM_VALID]


def label_img(img):
    word_label = img.split('.')[-3]
        
    if word_label == 'cat':
        return [1,0]
    elif word_label == 'dog':
        return [0,1]
    else:
        raise ValueError("Something is wrong with the image names. Did you specify the path properly?")

def load_training_data(names):
    data = np.empty((len(names), WIDTH, HEIGHT, CHANNELS))
    labels = np.empty((len(names), 2), dtype=np.int32)
    
    for i, name in enumerate(names):
        
        path = os.path.join(TRAIN_DIR, name)

        label = label_img(name)
        img = cv2.imread(path, cv2.IMREAD_COLOR)
        img = cv2.resize(img, (WIDTH, HEIGHT), interpolation=cv2.INTER_CUBIC)
                            
        data[i] = img
        labels[i] = label
        
    return data, labels


train_data, train_labels = load_training_data(train_images)
valid_data, valid_labels = load_training_data(valid_images)

train_norm = (train_data[:, :, :] - PIXEL_DEPTH / 2) / PIXEL_DEPTH
valid_norm = (valid_data[:, :, :] - PIXEL_DEPTH / 2) / PIXEL_DEPTH

In [None]:
class Dataset():
    
    def __init__(self, train_data, train_labels, valid_data, valid_labels, batch_size=50):
        self.train_data = train_data
        self.train_labels = train_labels
        self.valid_data = valid_data
        self.valid_labels = valid_labels
        
        self.i = 0
        self.batch_size = batch_size
        self.size = self.train_data.shape[0]
        
    def get_batch(self):
        
        ind = range(self.i, self.i + self.batch_size)
        
        data = self.train_data.take(ind, axis=0, mode="wrap")
        labels = self.train_labels.take(ind, axis=0, mode="wrap")
        
        self.i = (self.i + self.batch_size) % self.size
        
        return data, labels
    
    def get_valid_data(self):
        return self.valid_data, self.valid_labels
    
dset = Dataset(train_norm, train_labels, valid_norm, valid_labels)

In [None]:
print("A few examples:")

for i in range(3):
    plt.imshow(train_data[i].astype(np.uint8), interpolation="bicubic")
    plt.show()

In [None]:
tf.reset_default_graph()

input_layer = tf.placeholder(tf.float32, shape=(None, WIDTH, HEIGHT, CHANNELS))
one_hot_labels = tf.placeholder(tf.int64, shape=(None, 2))

conv1 = tf.layers.conv2d(input_layer, 16, (5, 5), padding="same", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(conv1, (2, 2), 2)

conv2 = tf.layers.conv2d(pool1, 32, (5, 5), padding="same", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(conv2, (2, 2), 2)

pool2_flat = tf.reshape(pool2, [-1, pool2.get_shape()[1].value * pool2.get_shape()[2].value * pool2.get_shape()[3].value])
dense = tf.layers.dense(pool2_flat, units=64, activation=tf.nn.relu)

logits = tf.layers.dense(dense, units=2)

predictions = tf.argmax(input=logits, axis=1)
predictions_gt = tf.argmax(input=one_hot_labels, axis=1)

correct = tf.equal(predictions, predictions_gt)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_labels, logits=logits))

optimizer = tf.train.AdamOptimizer(0.001)
train_op = optimizer.minimize(loss)


In [None]:
def train_step():
    data, labels = dset.get_batch()
                
    tmp_loss, _ = sess.run([loss, train_op], feed_dict = {
        input_layer: data,
        one_hot_labels: labels
    })
    
    return tmp_loss

def valid_step():
    
    valid_data, valid_labels = dset.get_valid_data()
                                    
    tmp_accuracy = sess.run(accuracy, feed_dict = {
        input_layer: valid_data,
        one_hot_labels: valid_labels
    })
 
    return tmp_accuracy

def baseline():
    
    _, valid_labels = dset.get_valid_data()
        
    cats = np.sum(valid_labels[:, 0])
    dogs = np.sum(valid_labels[:, 1])

    if cats >= dogs:
        ratio = cats / (cats + dogs)
        print("always predict cats accuracy: %.2f%%" % (ratio * 100))
    else:
        ratio = dogs / (cats + dogs)
        print("always predict dogs accuracy: %.2f%%" % (ratio * 100))
        
def example_predictions(sess, amount=10):
    data, _ = dset.get_valid_data()

    data = data[:amount]
    labels = sess.run(predictions, feed_dict = {
        input_layer: data
    })
    
    return labels

In [None]:
num_iters = 1000
batch_size = 32
log_frequency = 100

dset.batch_size = batch_size

init_op = tf.global_variables_initializer()

with tf.Session() as sess:
    
    sess.run(init_op)
    
    total_loss = 0
    
    for i in range(num_iters):
        
        if i != 0 and i % log_frequency == 0:
            print("training loss: %f" % (total_loss / log_frequency))
            total_loss = 0
                                                
            tmp_accuracy = valid_step()
            
            print("validation accuracy: %.2f%%" % (tmp_accuracy * 100))
        
        total_loss += train_step()
        
    tmp_accuracy = valid_step()
    print("\nfinal accuracy: %.2f%%" % (tmp_accuracy * 100))
    
    baseline()
    sample_predictions = example_predictions(sess)

In [None]:
for i in range(10):
    if sample_predictions[i] == 0:
        print("prediction: cat")
    else:
        print("prediction: dog")
        
    plt.imshow(train_data[i].astype(np.uint8), interpolation="bicubic")
    plt.show()