In [1]:
import numpy as np
from Tools.layers import *

In [2]:
# Test the affine_forward function

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

num_inputs = 2
input_shape = (4, 5, 6)
output_dim = 3

input_size = num_inputs * np.prod(input_shape)
weight_size = output_dim * np.prod(input_shape)

x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim)
b = np.linspace(-0.3, 0.1, num=output_dim)

out, _ = layer_forward(x, w, b)
correct_out = np.array([[ 1.49834967,  1.70660132,  1.91485297],
                        [ 3.25553199,  3.5141327,   3.77273342]])

# Compare your output with ours. The error should be around e-9 or less.
print('Testing affine_forward function:')
print('difference: ', rel_error(out, correct_out))

Testing affine_forward function:
difference:  9.7698500479884e-10


In [3]:
# Test the affine_backward function
from Tools.gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
np.random.seed(231)
x = np.random.randn(10, 2, 3)
w = np.random.randn(6, 5)
b = np.random.randn(5)
dout = np.random.randn(10, 5)

dx_num = eval_numerical_gradient_array(lambda x: layer_forward(x, w, b)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: layer_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: layer_forward(x, w, b)[0], b, dout)

_, cache = layer_forward(x, w, b)
dx, dw, db = layer_backward(dout, cache)

# The error should be around e-10 or less
print('Testing affine_backward function:')
print('dx error: ', rel_error(dx_num, dx))
print('dw error: ', rel_error(dw_num, dw))
print('db error: ', rel_error(db_num, db))

Testing affine_backward function:
dx error:  1.0908199508708189e-10
dw error:  2.1752635504596857e-10
db error:  7.736978834487815e-12


In [4]:
# Test the relu_forward function

x = np.linspace(-0.5, 0.5, num=12).reshape(3, 4)

out, _ = relu_forward(x)
correct_out = np.array([[ 0.,          0.,          0.,          0.,        ],
                        [ 0.,          0.,          0.04545455,  0.13636364,],
                        [ 0.22727273,  0.31818182,  0.40909091,  0.5,       ]])

# Compare your output with ours. The error should be on the order of e-8
print('Testing relu_forward function:')
print('difference: ', rel_error(out, correct_out))

Testing relu_forward function:
difference:  4.999999798022158e-08


In [5]:
np.random.seed(231)
x = np.random.randn(10, 10)
dout = np.random.randn(*x.shape)

dx_num = eval_numerical_gradient_array(lambda x: relu_forward(x)[0], x, dout)

_, cache = relu_forward(x)
dx = relu_backward(dout, cache)

# The error should be on the order of e-12
print('Testing relu_backward function:')
print('dx error: ', rel_error(dx_num, dx))

Testing relu_backward function:
dx error:  3.2756349136310288e-12


In [6]:
# test layer & relu
np.random.seed(231)
x = np.random.randn(2, 3, 4)
w = np.random.randn(12, 10)
b = np.random.randn(10)
dout = np.random.randn(2, 10)

out, cache = layer_relu_forward(x, w, b)
dx, dw, db = layer_relu_backward(dout, cache)

dx_num = eval_numerical_gradient_array(lambda x: layer_relu_forward(x, w, b)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: layer_relu_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: layer_relu_forward(x, w, b)[0], b, dout)

# Relative error should be around e-10 or less
print('Testing layer_relu_forward and layer_relu_backward:')
print('dx error: ', rel_error(dx_num, dx))
print('dw error: ', rel_error(dw_num, dw))
print('db error: ', rel_error(db_num, db))

Testing layer_relu_forward and layer_relu_backward:
dx error:  6.395535042049294e-11
dw error:  8.162011105764925e-11
db error:  7.826724021458994e-12


In [7]:
np.random.seed(231)
num_classes, num_inputs = 10, 50
x = 0.001 * np.random.randn(num_inputs, num_classes)
y = np.random.randint(num_classes, size=num_inputs)

dx_num = eval_numerical_gradient(lambda x: svm_loss(x, y)[0], x, verbose=False)
loss, dx = svm_loss(x, y)

# Test svm_loss function. Loss should be around 9 and dx error should be around the order of e-9
print('Testing svm_loss:')
print('loss: ', loss)
print('dx error: ', rel_error(dx_num, dx))

dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0], x, verbose=False)
loss, dx = softmax_loss(x, y)

# Test softmax_loss function. Loss should be close to 2.3 and dx error should be around e-8
print('\nTesting softmax_loss:')
print('loss: ', loss)
print('dx error: ', rel_error(dx_num, dx))
print(dx_num[1,:])
print(dx[1,:])

Testing svm_loss:
loss:  8.999602749096233
dx error:  1.4021566006651672e-09

Testing softmax_loss:
loss:  2.302545844500738
dx error:  9.483503037636722e-09
[ 0.00199856  0.00199983  0.00199757  0.00200115  0.0020019  -0.01799971
  0.00199895  0.00200102  0.00199949  0.00200123]
[ 0.00199856  0.00199983  0.00199757  0.00200115  0.0020019  -0.01799971
  0.00199895  0.00200102  0.00199949  0.00200123]


In [8]:
from Tools.fc_net import *
np.random.seed(231)
N, D, H, C = 3, 5, 50, 7
X = np.random.randn(N, D)
y = np.random.randint(C, size=N)

std = 1e-3
model = TwoLayerNet(input_dim=D, hidden_dim=H, num_classes=C, weight_scale=std)

print('Testing initialization ... ')
W1_std = abs(model.params['W1'].std() - std)
b1 = model.params['b1']
W2_std = abs(model.params['W2'].std() - std)
b2 = model.params['b2']
assert W1_std < std / 10, 'First layer weights do not seem right'
assert np.all(b1 == 0), 'First layer biases do not seem right'
assert W2_std < std / 10, 'Second layer weights do not seem right'
assert np.all(b2 == 0), 'Second layer biases do not seem right'

print('Testing test-time forward pass ... ')
model.params['W1'] = np.linspace(-0.7, 0.3, num=D*H).reshape(D, H)
model.params['b1'] = np.linspace(-0.1, 0.9, num=H)
model.params['W2'] = np.linspace(-0.3, 0.4, num=H*C).reshape(H, C)
model.params['b2'] = np.linspace(-0.9, 0.1, num=C)
X = np.linspace(-5.5, 4.5, num=N*D).reshape(D, N).T
scores = model.loss(X)
correct_scores = np.asarray(
  [[11.53165108,  12.2917344,   13.05181771,  13.81190102,  14.57198434, 15.33206765,  16.09215096],
   [12.05769098,  12.74614105,  13.43459113,  14.1230412,   14.81149128, 15.49994135,  16.18839143],
   [12.58373087,  13.20054771,  13.81736455,  14.43418138,  15.05099822, 15.66781506,  16.2846319 ]])
scores_diff = np.abs(scores - correct_scores).sum()
assert scores_diff < 1e-6, 'Problem with test-time forward pass'

print('Testing training loss (no regularization)')
y = np.asarray([0, 5, 1])
loss, grads = model.loss(X, y)
correct_loss = 3.4702243556
assert abs(loss - correct_loss) < 1e-10, 'Problem with training-time loss'

model.reg = 1.0
loss, grads = model.loss(X, y)
correct_loss = 26.5948426952
assert abs(loss - correct_loss) < 1e-10, 'Problem with regularization loss'

# Errors should be around e-7 or less
for reg in [0.0, 0.7]:
  print('Running numeric gradient check with reg = ', reg)
  model.reg = reg
  loss, grads = model.loss(X, y)

  for name in sorted(grads):
    f = lambda _: model.loss(X, y)[0]
    grad_num = eval_numerical_gradient(f, model.params[name], verbose=False)
    print('%s relative error: %.2e' % (name, rel_error(grad_num, grads[name])))

Testing initialization ... 
Testing test-time forward pass ... 
Testing training loss (no regularization)
Running numeric gradient check with reg =  0.0
W1 relative error: 1.22e-08
W2 relative error: 3.42e-10
b1 relative error: 6.55e-09
b2 relative error: 2.53e-10
Running numeric gradient check with reg =  0.7
W1 relative error: 2.53e-07
W2 relative error: 1.37e-07
b1 relative error: 1.56e-08
b2 relative error: 9.09e-10


In [11]:
from Tools.data_utils import load_CIFAR10

cifar10_dir = 'C:/Users/rainstar/Jupyter Folder/cs231n/cifar-10-batches-py/'

try:
    del X_train,Y_train
    del X_test,Y_test
    print('Clear previously loaded data')
except:
    pass

X_train,Y_train,X_test,Y_test = load_CIFAR10(cifar10_dir)

print('Training data shape: ',X_train.shape)
print('Training labels shape: ',Y_train.shape)
print('Test data shape: ',X_test.shape)
print('Test labels shape: ',Y_test.shape)

num_train = 49000
num_validation = 1000
num_test = 1000
num_dev =  500

mask = range(num_train,num_train+num_validation)
X_val = X_train[mask]
Y_val = Y_train[mask].astype('int')

mask = range(num_train)
X_train = X_train[mask]
Y_train = Y_train[mask].astype('int')

mask = range(num_test)
X_test = X_test[mask]
Y_test = Y_test[mask].astype('int')

mask = np.random.choice(num_train,num_dev,replace=True)
X_dev = X_train[mask]
Y_dev = Y_train[mask].astype('int')

print('Train data shape: ', X_train.shape)
print('Train labels shape: ', Y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', Y_val.shape)
print('Test data shape: ', Y_test.shape)
print('Test labels shape: ', Y_test.shape)

# normalize
mean_image = np.mean(X_train,axis=0)

X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image

# append bias
# X_train = np.hstack([X_train,np.ones((X_train.shape[0],1))])
# X_val = np.hstack([X_val,np.ones((X_val.shape[0],1))])
# X_test = np.hstack([X_test,np.ones((X_test.shape[0],1))])
# X_dev = np.hstack([X_dev,np.ones((X_dev.shape[0],1))])

# print('X_train: ',X_train.shape)
# print('X_val: ',X_val.shape)
# print('X_test: ',X_test.shape)
# print('X_dev: ',X_dev.shape)

Clear previously loaded data
Training data shape:  (50000, 3072)
Training labels shape:  (50000,)
Test data shape:  (10000, 3072)
Test labels shape:  (10000,)
Train data shape:  (49000, 3072)
Train labels shape:  (49000,)
Validation data shape:  (1000, 3072)
Validation labels shape:  (1000,)
Test data shape:  (1000,)
Test labels shape:  (1000,)


In [12]:
from Tools.solver import *
input_size = 32 * 32 * 3
hidden_size = 50
num_classes = 10
model = TwoLayerNet(input_size, hidden_size, num_classes)
solver = None

##############################################################################
# TODO: Use a Solver instance to train a TwoLayerNet that achieves about 36% #
# accuracy on the validation set.                                            #
##############################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

data = {}
data['X_train'] = X_train
data['X_val'] = X_val
data['y_train'] = Y_train
data['y_val'] = Y_val
solver = Solver(model, data,
                    update_rule='sgd',
                    optim_config={
                      'learning_rate': 1e-3,
                    },
                    lr_decay=0.95,
                    num_epochs=10, batch_size=100,
                    print_every=100)
solver.train()

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
##############################################################################
#                             END OF YOUR CODE                               #
##############################################################################

(Iteration 1 / 4900) loss: 2.304261
(Epoch 0 / 10) train acc: 0.136000; val_acc: 0.149000
(Iteration 101 / 4900) loss: 1.783493
(Iteration 201 / 4900) loss: 1.663331
(Iteration 301 / 4900) loss: 1.801142
(Iteration 401 / 4900) loss: 1.574793
(Epoch 1 / 10) train acc: 0.440000; val_acc: 0.444000
(Iteration 501 / 4900) loss: 1.516751
(Iteration 601 / 4900) loss: 1.570565
(Iteration 701 / 4900) loss: 1.492328
(Iteration 801 / 4900) loss: 1.430815
(Iteration 901 / 4900) loss: 1.369015
(Epoch 2 / 10) train acc: 0.495000; val_acc: 0.466000
(Iteration 1001 / 4900) loss: 1.525226
(Iteration 1101 / 4900) loss: 1.489596
(Iteration 1201 / 4900) loss: 1.272572
(Iteration 1301 / 4900) loss: 1.424399
(Iteration 1401 / 4900) loss: 1.487955
(Epoch 3 / 10) train acc: 0.506000; val_acc: 0.479000
(Iteration 1501 / 4900) loss: 1.369955
(Iteration 1601 / 4900) loss: 1.203881
(Iteration 1701 / 4900) loss: 1.323502
(Iteration 1801 / 4900) loss: 1.297813
(Iteration 1901 / 4900) loss: 1.331007
(Epoch 4 / 10) t