In [None]:
%matplotlib inline


Warm-up: numpy
--------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x using Euclidean error.

This implementation uses numpy to manually compute the forward pass, loss, and
backward pass.

A numpy array is a generic n-dimensional array; it does not know anything about
deep learning or gradients or computational graphs, and is just a way to perform
generic numeric computations.



In [3]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

(0, 36478902.136433885)
(1, 34153914.667655274)
(2, 31001582.81710024)
(3, 24555804.460928753)
(4, 16493681.13278852)
(5, 9786391.343750112)
(6, 5589032.150086942)
(7, 3351002.677597764)
(8, 2209514.8098610905)
(9, 1603775.3012527435)
(10, 1249944.5085678394)
(11, 1019202.1222347469)
(12, 853646.2218285258)
(13, 726662.6857766774)
(14, 625164.8852985026)
(15, 541987.9606914284)
(16, 472653.86990409746)
(17, 414319.8503268105)
(18, 364884.1107186554)
(19, 322669.3923101789)
(20, 286386.96369343414)
(21, 255022.41696761243)
(22, 227801.07465447797)
(23, 204067.30816174773)
(24, 183316.6785738858)
(25, 165085.53937317952)
(26, 149012.413863462)
(27, 134795.19772266882)
(28, 122191.04589868526)
(29, 110984.95364487992)
(30, 100994.10575613142)
(31, 92056.41235340433)
(32, 84056.26576508925)
(33, 76870.15304968029)
(34, 70396.58466624284)
(35, 64558.062119625865)
(36, 59280.32868280356)
(37, 54492.426080329984)
(38, 50152.180831110716)
(39, 46213.122560967844)
(40, 42631.39158301092)
(41, 3

(352, 0.10562355589049086)
(353, 0.10219058564699433)
(354, 0.0988692033171642)
(355, 0.0956566372562816)
(356, 0.09254930934707326)
(357, 0.0895430676978124)
(358, 0.0866349332465492)
(359, 0.08382162028793705)
(360, 0.08109996560394236)
(361, 0.07846779761584367)
(362, 0.07592071339340652)
(363, 0.07345691647313621)
(364, 0.07107364071017111)
(365, 0.06876766523339728)
(366, 0.06653737782010014)
(367, 0.06437917629249583)
(368, 0.06229183847458847)
(369, 0.060272387184809456)
(370, 0.05831847594808063)
(371, 0.056428543400042316)
(372, 0.054599832162399276)
(373, 0.052830649443330296)
(374, 0.05111930963511517)
(375, 0.049463340953286576)
(376, 0.047861544037653775)
(377, 0.046311509890085326)
(378, 0.044811848369051346)
(379, 0.04336133045603531)
(380, 0.0419575384777765)
(381, 0.04059956650940533)
(382, 0.03928598722885962)
(383, 0.03801472778354249)
(384, 0.036784924036988895)
(385, 0.03559495400703859)
(386, 0.03444369861401966)
(387, 0.03332979822310826)
(388, 0.0322519669498632