In [1]:
# A bit of setup

import numpy as np
import matplotlib.pyplot as plt

from dl4cv.classifiers.neural_net import TwoLayerNet

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [22]:
import numpy as np
X = np.zeros((3,2,2,2))
Y = np.reshape(X,(X.shape[0],-1))
Y.shape



(3, 8)

In [2]:
# Create a small net and some toy data to check your implementations.
# Note that we set the random seed for repeatable experiments.

input_size = 4
hidden_size = 10
num_classes = 3
num_inputs = 5

def init_toy_model():
  np.random.seed(0)
  return TwoLayerNet(input_size, hidden_size, num_classes, std=1e-1)

def init_toy_data():
  np.random.seed(1)
  X = 10 * np.random.randn(num_inputs, input_size)
  y = np.array([0, 1, 2, 2, 1])
  return X, y

net = init_toy_model()
X, y = init_toy_data()

In [8]:
std = 1e-1
W1 = std * np.random.randn(input_size, hidden_size)
b1 = np.zeros(hidden_size)
W2 = std * np.random.randn(hidden_size, num_classes)
b2 = np.zeros(num_classes)

In [20]:
scores = None
hidden_layer = np.maximum(0,X.dot(W1)+b1)
scores = hidden_layer.dot(W2) + b2


In [21]:
scores

array([[ 0.22821849,  0.74070957,  0.76814564],
       [-0.52946699, -0.0556803 ,  0.23713261],
       [-0.30383779, -0.15451098,  0.25903793],
       [-0.33393053, -0.23718961,  0.03537417],
       [-0.20781756,  0.18432376,  0.06046585]])

In [22]:
K = np.array(range(0,scores.shape[1]))
y.resize(y.shape[0],1)

In [53]:
L = np.zeros_like(scores)
L = (y == K).astype(int)
L

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 0, 1],
       [0, 1, 0]])

In [25]:
temp = np.exp(scores)
A = np.sum(temp,axis=1)
B = temp

In [54]:
H = B/A[:,None]
H

array([[ 0.22803319,  0.3806888 ,  0.39127801],
       [ 0.2101503 ,  0.33751491,  0.45233479],
       [ 0.25531264,  0.29643134,  0.44825602],
       [ 0.28182484,  0.31045118,  0.40772398],
       [ 0.26400099,  0.39076024,  0.34523877]])

In [55]:
reg=0.1
loss =  -(1.0/float(X.shape[0]))*np.sum(L*np.log(H)) + 0.5*reg*(np.sum(W1*W1)) +0.5*reg*(np.sum(W2*W2))
loss

1.0662114766004585

In [56]:
grads = {}
H-L

array([[-0.77196681,  0.3806888 ,  0.39127801],
       [ 0.2101503 , -0.66248509,  0.45233479],
       [ 0.25531264,  0.29643134, -0.55174398],
       [ 0.28182484,  0.31045118, -0.59227602],
       [ 0.26400099, -0.60923976,  0.34523877]])

In [33]:
dtemp = (1.0/float(X.shape[0]))*(H-L)

In [34]:
dtemp

array([[-0.15439336,  0.07613776,  0.0782556 ],
       [ 0.04203006, -0.13249702,  0.09046696],
       [ 0.05106253,  0.05928627, -0.1103488 ],
       [ 0.05636497,  0.06209024, -0.1184552 ],
       [ 0.0528002 , -0.12184795,  0.06904775]])

In [35]:
dW2 = hidden_layer.T.dot(dtemp) + reg*W2
db2 =  np.sum(dtemp,axis=0,keepdims= True)

In [36]:
dW2

array([[ 0.0506531 , -0.11048115,  0.06994206],
       [-0.43208156,  0.14683884,  0.28639126],
       [-0.33944695,  0.15241529,  0.23540188],
       [ 0.44431204, -0.39358635, -0.08417645],
       [-0.13013306, -0.1399467 ,  0.28359817],
       [ 0.00322205, -0.05716432,  0.03893796],
       [ 0.1722221 , -0.46118525,  0.29666093],
       [ 0.22554845, -0.13792205, -0.08366785],
       [ 0.001983  ,  0.00119009, -0.00670662],
       [ 0.00377564,  0.00121821,  0.01129484]])

In [37]:
db2

array([[ 0.04786439, -0.05683071,  0.00896632]])

In [40]:
dh = dtemp.dot(W2.T)
dh

array([[ 0.02520042,  0.00773043,  0.01120329,  0.00661809,  0.00667011,
         0.03536944,  0.00050943,  0.00772889, -0.00740382,  0.00393703],
       [-0.01513055, -0.00840928,  0.00439136,  0.00870085, -0.00808089,
         0.00304822, -0.01114067,  0.00039394, -0.00681065,  0.0101909 ],
       [-0.00208448,  0.00220803, -0.00932875, -0.01012576,  0.00252867,
        -0.02127784,  0.00814597, -0.00444394,  0.0091188 , -0.00981355],
       [-0.00254899,  0.00224821, -0.01007427, -0.01086222,  0.00260334,
        -0.02310715,  0.00866187, -0.00483048,  0.00980099, -0.0104948 ],
       [-0.01570751, -0.00804824,  0.00254728,  0.00673948, -0.00765155,
        -0.00122925, -0.00960517, -0.0005019 , -0.00503384,  0.00830801]])

In [41]:
dh[hidden_layer <= 0] = 0

db1 = np.sum(dh, axis=0,keepdims= True)

In [47]:
dW1 = X.T.dot(dh) + reg*W1

In [49]:
db1 = np.sum(dh, axis=0,keepdims= True)

In [50]:
db1

array([[-0.03338706, -0.00067885, -0.00380837,  0.00107044, -0.00393031,
        -0.00122925, -0.02074584, -0.00938238,  0.        ,  0.        ]])

In [52]:
dW1

In [57]:
np.log(0.1)

-2.3025850929940455

In [3]:
import numpy as np
X = np.ones((5,1))


In [13]:
Y = np.zeros((X.shape))

In [14]:
Y

array([[ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.]])