In [1]:
%matplotlib inline


Warm-up: numpy
--------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x using Euclidean error.

This implementation uses numpy to manually compute the forward pass, loss, and
backward pass.

A numpy array is a generic n-dimensional array; it does not know anything about
deep learning or gradients or computational graphs, and is just a way to perform
generic numeric computations.



In [1]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 28753920.185938854
1 24443820.40484578
2 23154063.439399227
3 21744930.069537036
4 18757767.707743958
5 14410026.034928288
6 9920439.882725421
7 6346007.514024435
8 3964140.499966959
9 2527804.329534983
10 1697178.1615755064
11 1212275.8300176712
12 917975.4382154506
13 728290.7188712938
14 597594.1292259895
15 502038.5916768518
16 428516.8382157211
17 369783.75445631833
18 321610.9653443156
19 281370.77640273696
20 247343.8258090823
21 218283.20420391607
22 193286.19837853912
23 171652.8703880692
24 152859.7269201425
25 136462.95215802733
26 122124.13737527624
27 109587.27328623825
28 98548.23327338662
29 88797.391019931
30 80159.88721072783
31 72490.99300184484
32 65670.08642066884
33 59587.988268823196
34 54170.16582002484
35 49316.63858335318
36 44961.57774883116
37 41048.0885267009
38 37522.17749762291
39 34340.635752386406
40 31466.364639959836
41 28863.76838725462
42 26503.947772805237
43 24363.415205379642
44 22418.656682880515
45 20648.926263699926
46 19037.71539457042
47 17

446 1.2098027693797906e-05
447 1.152405722181202e-05
448 1.0977350426788593e-05
449 1.0456613933964399e-05
450 9.960601495730622e-06
451 9.488145875913594e-06
452 9.038127403583945e-06
453 8.609461755477884e-06
454 8.201166848831883e-06
455 7.812228465706596e-06
456 7.441767874607422e-06
457 7.088895723698932e-06
458 6.752766976503644e-06
459 6.432592009783275e-06
460 6.127629567008043e-06
461 5.8371326329250965e-06
462 5.560414803306712e-06
463 5.296836076824267e-06
464 5.045743445009789e-06
465 4.806576068627637e-06
466 4.57875976493273e-06
467 4.361741730194425e-06
468 4.1550301968704745e-06
469 3.958113934930466e-06
470 3.7705438602205034e-06
471 3.591874069244266e-06
472 3.4216681649153257e-06
473 3.259547048567536e-06
474 3.1051273962083735e-06
475 2.958006393511355e-06
476 2.8178683363821237e-06
477 2.6843736924890077e-06
478 2.5572043291197224e-06
479 2.436072441946498e-06
480 2.3206804247642395e-06
481 2.210756297685066e-06
482 2.1060505846771422e-06
483 2.0063008833867295e-06