In [1]:
%matplotlib inline


Warm-up: numpy
--------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x using Euclidean error.

This implementation uses numpy to manually compute the forward pass, loss, and
backward pass.

A numpy array is a generic n-dimensional array; it does not know anything about
deep learning or gradients or computational graphs, and is just a way to perform
generic numeric computations.



In [2]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 21152203.5999
1 16092090.476
2 14165774.1582
3 13598933.592
4 13421003.4233
5 12991811.6353
6 11896061.7202
7 10175089.4718
8 8078323.0551
9 6037475.4142
10 4307443.33002
11 3000765.61285
12 2077484.78522
13 1455599.9026
14 1043343.18805
15 771543.107962
16 590203.790396
17 466767.550746
18 380368.695477
19 317752.661351
20 270765.010956
21 234304.055474
22 205160.262384
23 181249.331596
24 161214.77526
25 144163.98363
26 129460.077774
27 116657.972271
28 105425.162492
29 95505.5581717
30 86693.2224736
31 78831.1911022
32 71793.3288273
33 65479.7732582
34 59799.7399165
35 54686.9711797
36 50065.9870712
37 45894.3854507
38 42113.1657077
39 38681.3435671
40 35564.5916002
41 32727.6931693
42 30143.513594
43 27786.4059579
44 25632.4970317
45 23663.5728462
46 21864.1088449
47 20216.9672871
48 18705.8114669
49 17318.297435
50 16043.9414103
51 14872.7685925
52 13794.8413062
53 12801.936692
54 11887.4219128
55 11043.8278172
56 10265.0160087
57 9545.79303978
58 8881.28131483
59 8266.88690075


438 0.000113420138013
439 0.000108841585172
440 0.000104446011895
441 0.000100228758446
442 9.61832276793e-05
443 9.23026632279e-05
444 8.85801542249e-05
445 8.50066533151e-05
446 8.15778428049e-05
447 7.82881001217e-05
448 7.51318709037e-05
449 7.2103760501e-05
450 6.91983632528e-05
451 6.64096921267e-05
452 6.37335956446e-05
453 6.11658134473e-05
454 5.87026518383e-05
455 5.63403216133e-05
456 5.40722463127e-05
457 5.18950975143e-05
458 4.98061600491e-05
459 4.78020860667e-05
460 4.5879458109e-05
461 4.40339721756e-05
462 4.22625318756e-05
463 4.0562772978e-05
464 3.893160412e-05
465 3.73664814392e-05
466 3.58652981122e-05
467 3.44237453555e-05
468 3.30403757033e-05
469 3.17127033547e-05
470 3.04384791051e-05
471 2.92166641545e-05
472 2.80436229862e-05
473 2.6917323482e-05
474 2.58364855211e-05
475 2.47992206171e-05
476 2.38039742329e-05
477 2.28493433949e-05
478 2.19324224996e-05
479 2.10523907921e-05
480 2.02078415056e-05
481 1.93972839002e-05
482 1.86196925143e-05
483 1.7873323762