In [1]:
%matplotlib inline


Warm-up: numpy
--------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x using Euclidean error.

This implementation uses numpy to manually compute the forward pass, loss, and
backward pass.

A numpy array is a generic n-dimensional array; it does not know anything about
deep learning or gradients or computational graphs, and is just a way to perform
generic numeric computations.



In [2]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 34015521.376625426
1 35208502.34964135
2 40995315.14964868
3 43136285.18932402
4 35529944.21548281
5 21287998.65400235
6 9907843.349399861
7 4302736.431237728
8 2150215.566502625
9 1333655.5952776861
10 973512.6015694292
11 773548.2530746481
12 639082.2789026293
13 537626.4984908017
14 457107.98397387133
15 391352.4418037403
16 336830.1162269587
17 291281.8698759642
18 253046.97374464927
19 220774.2134165935
20 193314.58308811105
21 169865.80818427337
22 149751.54097418208
23 132393.05987596372
24 117341.83892965666
25 104252.38311264857
26 92833.07620658484
27 82843.23450003979
28 74080.77890228266
29 66368.49322211328
30 59569.55558375197
31 53563.16813920838
32 48242.7416923604
33 43520.20228255923
34 39319.385129082395
35 35578.97231498164
36 32240.44215920415
37 29253.65280366206
38 26576.993926908173
39 24177.584934209273
40 22023.276989145746
41 20083.34566659559
42 18332.633604669743
43 16751.672570160015
44 15322.054778830192
45 14027.590080517279
46 12853.641848830033
47 11

363 9.512821311330526e-05
364 9.013101131517064e-05
365 8.539576092300372e-05
366 8.090912727257293e-05
367 7.665927391698027e-05
368 7.263287900222141e-05
369 6.881832590588976e-05
370 6.52038249636881e-05
371 6.17795776899648e-05
372 5.853592244659514e-05
373 5.546324587170048e-05
374 5.255143542054319e-05
375 4.979305937153417e-05
376 4.717984252301189e-05
377 4.4703716187991936e-05
378 4.235814390442516e-05
379 4.013571159567707e-05
380 3.8029880904614896e-05
381 3.603462417391945e-05
382 3.4144012586868705e-05
383 3.235270799366965e-05
384 3.065605419181062e-05
385 2.904833264701964e-05
386 2.752477589754185e-05
387 2.6081228394995187e-05
388 2.4713654315220094e-05
389 2.3418006337723835e-05
390 2.2190357915443176e-05
391 2.1027128006737013e-05
392 1.99251376907769e-05
393 1.8880855135676986e-05
394 1.7891200955956728e-05
395 1.6953619720316945e-05
396 1.6065397668400274e-05
397 1.5223708801510713e-05
398 1.4425973748464919e-05
399 1.367014309458812e-05
400 1.2953986715503862e-05
