# Locally Connected Neural Network

Our Final Project


In [1]:
#Use the setup that were given in assignment2/ConolutionNetworks
import numpy as np
import matplotlib.pyplot as plt
from cs682.classifiers.cnn import *
from cs682.data_utils import get_CIFAR10_data
from cs682.gradient_check import eval_numerical_gradient_array, eval_numerical_gradient
from cs682.layers import *
from cs682.fast_layers import *
from cs682.solver import Solver

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [2]:
# Load the (preprocessed) CIFAR10 data.

data = get_CIFAR10_data()
for k, v in data.items():
  print('%s: ' % k, v.shape)

X_train:  (49000, 3, 32, 32)
y_train:  (49000,)
X_val:  (1000, 3, 32, 32)
y_val:  (1000,)
X_test:  (1000, 3, 32, 32)
y_test:  (1000,)


In [69]:
#Import the layer and net work we implimented
from lcn import *
from lc_fc_net import *

# Locally Connected Neural Network: Naive forward pass

In [32]:
x_shape = (2, 3, 5, 5)
w_shape = (3, 3, 5, 5)
x = np.linspace(-0.1, 0.5, num=np.prod(x_shape)).reshape(x_shape)
w = np.linspace(-0.2, 0.3, num=np.prod(w_shape)).reshape(w_shape)
b = np.linspace(-0.1, 0.2, num=3)


lcn_param = {'center_dist': 1}
out, _ = lcn_forward(x, w, b, lcn_param)



[2. 2.] [2. 2.]
[[[[-0.09936767 -0.10046236 -0.1046113  -0.10810687 -0.10972791]
   [-0.11134542 -0.11576883 -0.12094344 -0.12387884 -0.12204838]
   [-0.12782472 -0.13629576 -0.14161451 -0.14274388 -0.13597231]
   [-0.13791621 -0.14850785 -0.15318404 -0.15271233 -0.14279329]
   [-0.13369889 -0.14232539 -0.14534925 -0.14414403 -0.13532413]]

  [[ 0.10450627  0.1176822   0.12286899  0.12155761  0.11052414]
   [ 0.12506773  0.14457948  0.15259354  0.15111518  0.13501898]
   [ 0.14289456  0.16794219  0.17844032  0.17684455  0.15636646]
   [ 0.15609654  0.18506905  0.19710535  0.19551028  0.17187368]
   [ 0.15206563  0.17909024  0.19022851  0.18879152  0.1668185 ]]

  [[ 0.30838021  0.33582676  0.35034928  0.3512221   0.3307762 ]
   [ 0.36148087  0.40492779  0.42613053  0.42610921  0.39208634]
   [ 0.41361384  0.47218014  0.49849515  0.49643298  0.44870523]
   [ 0.4501093   0.51864594  0.54739475  0.54373289  0.48654064]
   [ 0.43783016  0.50050587  0.52580626  0.52172708  0.46896114]]]


 

In [73]:
np.random.seed(231)
x = np.random.randn(4, 3, 5, 5)
w = np.random.randn(2, 3, 5, 5)
b = np.random.randn(2,)
dout = np.random.randn(4, 2, 5, 5)
lcn_param = {'center_dist': 1}

dx_num = eval_numerical_gradient_array(lambda x: lcn_forward(x, w, b, lcn_param)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: lcn_forward(x, w, b, lcn_param)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: lcn_forward(x, w, b, lcn_param)[0], b, dout)

out, cache = lcn_forward(x, w, b, lcn_param)
dx, dw, db = lcn_backward(dout, cache)

# Your errors should be around e-8 or less.
print('Testing lcn_backward function')
print('dx error: ', rel_error(dx, dx_num))
print('dw error: ', rel_error(dw, dw_num))
print('db error: ', rel_error(db, db_num))

Testing lcn_backward function
dx error:  7.605098238940165e-09
dw error:  6.615969864701e-09
db error:  1.424218603976872e-11


In [77]:
# Compare the time with conv_naive
from time import time
np.random.seed(231)
x = np.random.randn(100, 3, 31, 31)
w_conv = np.random.randn(25, 3, 3, 3)
w_lcn = np.random.randn(3, 3, 28, 28)
b_conv = np.random.randn(25,)
b_lcn = np.random.randn(5,)
dout_conv = np.random.randn(100, 25, 16, 16)
dout_lcn = np.random.randn(100, 25, 28, 28)
conv_param = {'stride': 2, 'pad': 1}
lcn_param = {'center_dist': 1}


t0 = time()
out_naive, cache_naive = conv_forward_naive(x, w_conv, b_conv, conv_param)
t1 = time()
out_lcn, cache_lcn = lcn_forward(x, w_lcn, b_conv, lcn_param)
t2 = time()

print('Testing lcn_forward:')
print('Conv: %fs' % (t1 - t0))
print('LCN: %fs' % (t2 - t1))

t0 = time()
dx_naive, dw_naive, db_naive = conv_backward_naive(dout, cache_naive)
t1 = time()
dx_lcn, dw_lcn, db_lcn = lcn_backward(dout, cache_lcn)
t2 = time()

print('Testing lcn_backward:')
print('Conv: %fs' % (t1 - t0))
print('LCN: %fs' % (t2 - t1))

Testing lcn_forward:
Conv: 4.583212s
LCN: 11.606509s
Testing lcn_backward:
Conv: 6.353170s
LCN: 5.007036s


In [78]:
np.random.seed(231)

num_train = 100
small_data = {
  'X_train': data['X_train'][:num_train],
  'y_train': data['y_train'][:num_train],
  'X_val': data['X_val'],
  'y_val': data['y_val'],
}

model = ThreeLayerLCNNet(weight_scale=1e-2)

solver = Solver(model, small_data,
                num_epochs=15, batch_size=50,
                update_rule='adam',
                optim_config={
                  'learning_rate': 1e-3,
                },
                verbose=True, print_every=1)
solver.train()

(Iteration 1 / 30) loss: 2.346525
(Epoch 0 / 15) train acc: 0.130000; val_acc: 0.119000
(Iteration 2 / 30) loss: 2.138636
(Epoch 1 / 15) train acc: 0.380000; val_acc: 0.158000
(Iteration 3 / 30) loss: 1.968961
(Iteration 4 / 30) loss: 1.807287
(Epoch 2 / 15) train acc: 0.450000; val_acc: 0.190000
(Iteration 5 / 30) loss: 1.893294
(Iteration 6 / 30) loss: 1.434143
(Epoch 3 / 15) train acc: 0.450000; val_acc: 0.199000
(Iteration 7 / 30) loss: 1.547614
(Iteration 8 / 30) loss: 1.712900
(Epoch 4 / 15) train acc: 0.540000; val_acc: 0.203000
(Iteration 9 / 30) loss: 1.586205
(Iteration 10 / 30) loss: 1.327572
(Epoch 5 / 15) train acc: 0.600000; val_acc: 0.224000
(Iteration 11 / 30) loss: 1.359757
(Iteration 12 / 30) loss: 1.264613
(Epoch 6 / 15) train acc: 0.560000; val_acc: 0.200000
(Iteration 13 / 30) loss: 1.320872
(Iteration 14 / 30) loss: 1.029861
(Epoch 7 / 15) train acc: 0.680000; val_acc: 0.215000
(Iteration 15 / 30) loss: 0.843372
(Iteration 16 / 30) loss: 0.924132
(Epoch 8 / 15) tr