In [None]:
%matplotlib inline


Warm-up: numpy
--------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x using Euclidean error.

This implementation uses numpy to manually compute the forward pass, loss, and
backward pass.

A numpy array is a generic n-dimensional array; it does not know anything about
deep learning or gradients or computational graphs, and is just a way to perform
generic numeric computations.



In [5]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)     # x = (64) input x 1000
y = np.random.randn(N, D_out)    # y = (64) output x 10

# Randomly initialize weights
w1 = np.random.randn(D_in, H)    # w1 = 1000 x 100
w2 = np.random.randn(H, D_out)   # w2 = 100 x 10

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)              # 64 x 1000 * 1000 x 100 = 64 x 100
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)    # 64 x 100 * 100 x 10 = 64 x 10

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print("%4d\t%.6f" % (t, loss))

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

   0	35966082.096489
   1	32901857.260657
   2	32346226.859722
   3	29196859.390193
   4	22432910.223836
   5	14427422.913511
   6	8273942.654634
   7	4623794.473917
   8	2757181.206053
   9	1823530.605361
  10	1331743.137921
  11	1043955.701194
  12	855598.576972
  13	719735.710885
  14	615016.596876
  15	530842.640317
  16	461417.315248
  17	403231.123950
  18	354062.125916
  19	312112.139512
  20	276099.640701
  21	245023.071179
  22	218073.266745
  23	194606.279221
  24	174081.227568
  25	156049.491270
  26	140172.007209
  27	126159.629179
  28	113763.953272
  29	102765.401990
  30	92980.149523
  31	84254.096114
  32	76466.456836
  33	69501.624245
  34	63254.183636
  35	57653.452929
  36	52611.788067
  37	48065.140943
  38	43957.400224
  39	40243.480859
  40	36879.934714
  41	33831.445235
  42	31063.504468
  43	28548.690739
  44	26261.100184
  45	24176.394246
  46	22272.871688
  47	20535.054634
  48	18948.033758
  49	17496.766636
  50	16170.391295
  51	14955.454111
  52	13841.17250