In [1]:
%matplotlib inline


Warm-up: numpy
--------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x using Euclidean error.

This implementation uses numpy to manually compute the forward pass, loss, and
backward pass.

A numpy array is a generic n-dimensional array; it does not know anything about
deep learning or gradients or computational graphs, and is just a way to perform
generic numeric computations.



In [2]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 30387599.04726187
1 26344723.057923503
2 24338733.555011056
3 21246862.004206903
4 16547503.119162027
5 11439466.205393918
6 7221250.707383399
7 4414056.06692946
8 2747412.0063839257
9 1807576.4800531748
10 1271904.3969834186
11 953049.056768188
12 749650.993480968
13 610112.7147979797
14 508002.9572778321
15 429595.1591477878
16 367253.7254359062
17 316512.86730280926
18 274485.86509090464
19 239250.79793409765
20 209469.71211426228
21 184134.17709315324
22 162441.2039955573
23 143773.6535042497
24 127660.08928875852
25 113687.06507034894
26 101515.18311444271
27 90890.03568393996
28 81582.53919013678
29 73396.53174746639
30 66177.77044657053
31 59798.70665766654
32 54140.484160783744
33 49111.74159127268
34 44635.24231031485
35 40633.38191575937
36 37044.58503285447
37 33824.49839359011
38 30931.191907798446
39 28327.386626127074
40 25977.963502635095
41 23853.23124436494
42 21928.72452235532
43 20182.51495769381
44 18596.0510248606
45 17152.263394283324
46 15836.753019884774
47 14

414 4.595247569661808e-05
415 4.3735968326354844e-05
416 4.1626305292209106e-05
417 3.9618774167898474e-05
418 3.770861477686894e-05
419 3.589035159080875e-05
420 3.415987756114464e-05
421 3.2515157145832896e-05
422 3.094788147342323e-05
423 2.945624991576773e-05
424 2.8036611035025763e-05
425 2.6685388010297977e-05
426 2.5399578692341522e-05
427 2.4175677354558366e-05
428 2.301100436784914e-05
429 2.1902647196469527e-05
430 2.084766181788298e-05
431 1.9843888630754003e-05
432 1.8889382769406477e-05
433 1.7979751595906296e-05
434 1.711405973845496e-05
435 1.6289981578953955e-05
436 1.550564621085749e-05
437 1.4759244049865017e-05
438 1.4048793985977163e-05
439 1.3372610532925531e-05
440 1.2729069771383617e-05
441 1.2116500502515377e-05
442 1.15334399605512e-05
443 1.0979189104779597e-05
444 1.0451034717997019e-05
445 9.948296392372399e-06
446 9.469697655955174e-06
447 9.014191241707446e-06
448 8.58063691316691e-06
449 8.168024943792808e-06
450 7.775453039683766e-06
451 7.40156067946945