# Having some fun with CNNs!
This is going to be a very standard project but I'm in a saturday night sitting at home, so I wanna have some fun!

In [1]:
#Imports
import tensorflow as tf
import numpy as np
import pandas as pd
from PIL import Image
import os
from keras.applications import Xception
from random import shuffle
from pathlib import Path
import matplotlib.pyplot as plt


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Preprocessing the Dataset
Since the Dataset is so well strutucted, we will take advantage of that and write the function to get the data in a more hard way, but the clearest!

In [2]:
def getImage(path, newWidth, newHeight):
    
    img = Image.open(path)
    img = img.resize((newWidth, newHeight), Image.BILINEAR)
    
    return np.array(img.convert('RGB'))

In [3]:
def getDataset(directoryPath, width, height):
    #Setting the paths
    trainingArray = []
    
    validationArray = []
    
    
    training_list_dirs = os.path.join(directoryPath, 'training/')
    validation_list_dirs = os.path.join(directoryPath, 'validation/')
    
    n_classes = 10
    
    for i in range(n_classes):
        dir_composit = ('n', str(i),'/')
        training_files_dir = os.path.join(training_list_dirs, "".join(dir_composit))
        training_files = os.listdir(training_files_dir)
        
        validation_files_dir = os.path.join(validation_list_dirs, "".join(dir_composit))
        validation_files = os.listdir(validation_files_dir)
        
        for file in training_files:
            trainingArray.append([getImage(os.path.join(training_files_dir,file), width, height), i])
        for file in validation_files:
            validationArray.append([getImage(os.path.join(validation_files_dir,file), width, height), i])
        
        shuffle(trainingArray)
        shuffle(validationArray)
        
        training_set = [value[0] for value in trainingArray]
        training_labels = [value[1] for value in trainingArray]
        
        validation_set = [value[0] for value in validationArray]
        validation_labels = [value[1] for value in validationArray]
        
        val_size = int(len(validation_set)*0.5)
        
        test_set, test_labels = validation_set[val_size:], validation_labels[val_size:]
        val_set, val_labels = validation_set[:val_size], validation_labels[:val_size]
        
            
        
    return np.array(training_set), np.array(test_set), np.array(training_labels), np.array(test_labels), np.array(val_set), np.array(val_labels)

In [4]:
train_set, test_set, train_labels, test_labels, val_set, val_labels = getDataset('./10-monkey-species/', 150, 150)
print(train_set.shape)

(1098, 150, 150, 3)


# Preprocessing the data
Using the Xception model to extract the features of the images, i've tested the data on a custom CNN and didnt got good results, so this is the way to go

In [5]:
def preprocessData(imageData, model, mode = 'Inference', path = None):
    
    proc = tf.contrib.keras.applications.xception.preprocess_input(imageData.astype(np.float32))
    
    if mode != 'Inference':
        results = model.predict(proc)
        full_path = os.path.join(path, 'preprocessed_input')
        os.makedirs(path)
        np.save(full_path, results)
        return results, full_path
    else:
        results = model.predict(proc)
        return results

In [6]:
def loadXception():
    W = Path('xception_weights_tf_dim_ordering_tf_kernels_notop.h5')
    model = Xception(include_top = False, weights = W, pooling = 'max')
    for layer in model.layers:
        layer.trainable = False
    return model

In [7]:
def getInputs(batch, n_classes):
    size = batch.shape[1]
    
    x = tf.placeholder(tf.float32, shape=(None, size))
    y = tf.placeholder(tf.int32, shape=(None))
    keep_prob = tf.placeholder(tf.float32)
    
    return x, y, keep_prob

In [8]:
def full_layer(x, n_classes, keep_prob):
    
    layer1 = tf.layers.dense(x, 512, activation = tf.nn.relu)
    drop1 = tf.nn.dropout(layer1, keep_prob)
    logits = tf.layers.dense(drop1, n_classes)
    
    return logits

In [9]:
def Loss(logits, labels):
    cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits, labels = labels)
    loss = tf.reduce_mean(cost)
    return loss

In [10]:
def Optimizer(loss, learning_rate):
    opt = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss)
    return opt

In [11]:
def Accuracy(logits, labels):
    correct = tf.nn.in_top_k(logits, labels, 1)
    acc = tf.reduce_mean(tf.cast(correct, "float"))
    return acc

In [12]:
model = loadXception()
pp_train, train_path = preprocessData(train_set, model, mode = 'Train', path = './logs/train')
pp_val, val_path = preprocessData(val_set, model, mode = 'Train', path = './logs/val')
pp_test, test_path = preprocessData(test_set, model, mode = 'Train', path = './logs/test' )

In [13]:
n_epochs = 31
batch_size = 32
learning_rate = 0.0001
n_classes = 10
keep_prob = 0.5

In [14]:
tf.reset_default_graph()

x, y, k_prob = getInputs(pp_train, n_classes)

logits = full_layer(x, n_classes, k_prob)

cost = Loss(logits, y)

optimizer = Optimizer(cost, learning_rate)

accuracy = Accuracy(logits, y)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    
    init.run()
    
    n_batches = int(len(pp_train)/batch_size)
    x_batches = np.array_split(pp_train, n_batches)
    y_batches = np.array_split(train_labels, n_batches)
    
    for epoch in range(n_epochs):
        
        for batch in range(n_batches):
            
            x_batch, y_batch = x_batches[batch], y_batches[batch]
            
            feed = {x:x_batch, y: y_batch, k_prob: keep_prob}
            
            _, loss = sess.run([optimizer, cost], feed_dict = feed)
        
        
        acc = accuracy.eval({x:pp_val, y:val_labels, k_prob: 1})
        print("Epoch:{}      Loss:{}        Accuracy:{}".format(epoch, loss, acc))
    
    saver = tf.train.Saver()
    save_path = saver.save(sess, './logs/model/monkeys_cnn.ckpt')
    
    print("Optimization Finished!")
            
            
    

Epoch:0      Loss:0.9760555028915405        Accuracy:0.8823529481887817
Epoch:1      Loss:0.6074819564819336        Accuracy:0.9338235259056091
Epoch:2      Loss:0.35906195640563965        Accuracy:0.9411764740943909
Epoch:3      Loss:0.2673106789588928        Accuracy:0.9558823704719543
Epoch:4      Loss:0.33719488978385925        Accuracy:0.9632353186607361
Epoch:5      Loss:0.1620817929506302        Accuracy:0.9558823704719543
Epoch:6      Loss:0.1514054238796234        Accuracy:0.9632353186607361
Epoch:7      Loss:0.13157455623149872        Accuracy:0.9485294222831726
Epoch:8      Loss:0.15650434792041779        Accuracy:0.9485294222831726
Epoch:9      Loss:0.08994272351264954        Accuracy:0.9558823704719543
Epoch:10      Loss:0.08792774379253387        Accuracy:0.9632353186607361
Epoch:11      Loss:0.05933644250035286        Accuracy:0.9485294222831726
Epoch:12      Loss:0.06460656225681305        Accuracy:0.9558823704719543
Epoch:13      Loss:0.11702905595302582        Accurac

# Evaluating on the test set

In [32]:
with tf.Session() as sess:
    saver.restore(sess, './logs/model/monkeys_cnn.ckpt')
    acc = accuracy.eval({x:pp_test, y:test_labels, k_prob:1})
    print(acc)

INFO:tensorflow:Restoring parameters from ./logs/model/monkeys_cnn.ckpt
0.9558824


# Conclusion
This is it! Over 95% of accuracy in a relative small dataset, I could use some tricks to maybe get a better result like the Keras ImageGenerator but this was good overall. Thanks!