# CNN implementation sanity checks

In [3]:
import sys, os
sys.path.append(os.path.join(os.path.dirname('.'), '../lib/'))

import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


from cs231n.classifiers.cnn import *
from cs231n.gradient_check import eval_numerical_gradient_array, eval_numerical_gradient
from cs231n.layers import *
from cs231n.fast_layers import *
from cs231n.solver import Solver



%matplotlib inline

plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading extenrnal modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
from cs231n.layer_utils import conv_relu_pool_forward, conv_relu_pool_backward

x = np.random.randn(2, 3, 16, 16)
w = np.random.randn(3, 3, 3, 3)
b = np.random.randn(3,)
dout = np.random.randn(2, 3, 8, 8)
conv_param = {'stride': 1, 'pad': 1}
pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

out, cache = conv_relu_pool_forward(x, w, b, conv_param, pool_param)
dx, dw, db = conv_relu_pool_backward(dout, cache)

dx_num = eval_numerical_gradient_array(lambda x: conv_relu_pool_forward(x, w, b, conv_param, pool_param)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: conv_relu_pool_forward(x, w, b, conv_param, pool_param)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: conv_relu_pool_forward(x, w, b, conv_param, pool_param)[0], b, dout)

print 'Testing conv_relu_pool'
print 'dx error: ', rel_error(dx_num, dx)
print 'dw error: ', rel_error(dw_num, dw)
print 'db error: ', rel_error(db_num, db)

Testing conv_relu_pool
dx error:  9.69117719256e-09
dw error:  1.74442857221e-09
db error:  4.30071778386e-11


## Sanity check loss
After you build a new network, one of the first things you should do is sanity check the loss. When we add regularization this should go up.

In [5]:
model = ThreeLayerConvNet(input_dim=(1, 96, 96), num_filters=32, filter_size=7,
               hidden_dim=100, num_outputs=15, weight_scale=1e-3, reg=0.0,
               dtype=np.float32)

N = 50
X = np.random.randn(N, 1, 96, 96)
y = np.random.randn(N, 15)

loss, grads = model.loss(X, y)
print 'Initial loss (no regularization): ', loss

model.reg = 0.5
loss, grads = model.loss(X, y)
print 'Initial loss (with regularization): ', loss

Initial loss (no regularization):  16.388273905
Initial loss (with regularization):  776.871306132


## Gradient check
After the loss looks reasonable, use numeric gradient checking to make sure that your backward pass is correct. When you use numeric gradient checking you should use a small amount of artifical data and a small number of neurons at each layer.

In [9]:
num_inputs = 2
input_dim = (1, 48, 48)
reg = 0.0
num_outputs = 30
X = np.random.randn(num_inputs, *input_dim)
y = np.random.randn(num_inputs, num_outputs)

model = ThreeLayerConvNet(num_filters=3, filter_size=3,
                          input_dim=input_dim, hidden_dim=7,
                          num_outputs=num_outputs, loss_fn=l2_loss,
                          dtype=np.float64, weight_scale=1e-2,
                        )
loss, grads = model.loss(X, y)
for param_name in sorted(grads):
    f = lambda _: model.loss(X, y)[0]
    param_grad_num = eval_numerical_gradient(f, model.params[param_name], verbose=False, h=1e-6)
    e = rel_error(param_grad_num, grads[param_name])
    print '%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name]))

W1 max relative error: 6.586571e-08
W2 max relative error: 1.984268e-05
W3 max relative error: 1.226340e-05
b1 max relative error: 1.108325e-08
b2 max relative error: 1.061583e-09
b3 max relative error: 3.942324e-08


## Overfit small data
A nice trick is to train your model with just a few training samples. You should be able to overfit small datasets, which will result in very high training accuracy and comparatively low validation accuracy.

In [None]:
# Load previously cleaned data

import os

np_loaded_data_file = '../data/train_data_cleaned.npz'
if not os.path.isfile(np_loaded_data_file):
    print "%s does not exist. See facial_recog_kaggle.ipynb" % np_loaded_data_file
else:
    print "loading %s" % np_loaded_data_file
    npzfile = np.load(np_loaded_data_file)
    print "loaded: ", npzfile.files
    X_train_clean, y_train_clean = npzfile['X_train_clean'], npzfile['y_train_clean']
    X_train_miss, y_train_miss = npzfile['X_train_miss'], npzfile['y_train_miss']
    

        
num_train = 100
num_val = 25

rand_idx = np.random.choice(num_train + num_val, num_train + num_val, replace=False)
X_train_small = X_train_clean[rand_idx[:num_train]]
y_train_small = y_train_clean[rand_idx[:num_train]]

X_val_small = X_train_clean[rand_idx[num_train:num_train + num_val]]
y_val_small = y_train_clean[rand_idx[num_train:num_train + num_val]]


In [None]:
small_data = {
  'X_train': X_train_small,
  'y_train': y_train_small,
  'X_val': X_val_small,
  'y_val': y_val_small,
}

model = ThreeLayerConvNet(weight_scale=1e-2)

solver = Solver(model, small_data,
                num_epochs=10, batch_size=50,
                update_rule='adam',
                optim_config={
                  'learning_rate': 1e-5,
                },
                verbose=True, print_every=1)
solver.train()