# Implementing a deep neural network with numpy

In [2]:
import numpy as np

In [9]:
n = 64
num_features = 1000
hidden_dim = 100
output_dim = 10

learning_rate = 1e-6
num_epochs = 500

In [10]:
# create input data
X = np.random.randn(n, num_features)
y = np.random.randn(n, output_dim)
X.shape, y.shape

((64, 1000), (64, 10))

In [11]:
# initialize weights
W1 = np.random.randn(num_features, hidden_dim)
W2 = np.random.randn(hidden_dim, output_dim)

In [14]:
for epoch in range(num_epochs):
    
    ###  Forward pass ###
    
    # hidden layer
    hidden_pre_activation = X.dot(W1)
    hidden_activation = np.maximum(hidden_pre_activation, 0) # reLU
    # ouput layer
    y_pred = hidden_activation.dot(W2) # prediction is just linear (no activation applied)

    ###  Compute and print loss  ###
    loss = np.square(y_pred - y).sum() # squared error loss
    print(epoch, loss)

    ###  Backprop  ###
    
    # step 1: output loss
    gradient_y_pred = 2.0 * (y_pred - y) # gradient for squared error
    
    # step 2: loss for W2 / hidden activation
    gradient_W2 = hidden_activation.T.dot(gradient_y_pred) # gradient w.r.t. W2 
    gradient_hidden_activation = gradient_y_pred.dot(W2.T) # gradient w.r.t. hidden activation 
    
    # step 3: gradient for hidden pre-activation
    gradient_hidden_pre_activation = gradient_hidden_activation.copy() # gradient w.r.t. hidden pre-activation
    gradient_hidden_pre_activation[hidden_pre_activation < 0] = 0
    
    # step 4: gradient for W1
    gradient_W1 = X.T.dot(gradient_hidden_pre_activation) # gradient w.r.t. W1

    ### Update weights ###
    W1 -= learning_rate * gradient_W1
    W2 -= learning_rate * gradient_W2

0 18718747.3706
1 14066989.6834
2 11466465.5769
3 9704923.25664
4 8274663.37706
5 6971434.80364
6 5753602.73466
7 4637204.94944
8 3663590.86918
9 2850074.80295
10 2196761.15669
11 1686636.82609
12 1296521.86814
13 1001551.75088
14 779799.677485
15 613177.639929
16 487539.500385
17 392222.820946
18 319186.935936
19 262657.653927
20 218396.822158
21 183367.950912
22 155309.812094
23 132571.167992
24 113951.642107
25 98552.9834652
26 85703.5728136
27 74886.3069329
28 65711.8037417
29 57882.1885752
30 51156.9389577
31 45349.1345938
32 40307.3184756
33 35912.5033464
34 32066.8073264
35 28695.1328547
36 25725.5000102
37 23104.0298793
38 20784.9405312
39 18726.2344114
40 16894.5024894
41 15261.2993444
42 13802.3022094
43 12496.7586733
44 11327.9363883
45 10279.3072031
46 9337.45057803
47 8490.19538508
48 7727.31730741
49 7039.0436088
50 6417.37039221
51 5855.21932273
52 5346.53036265
53 4885.47344703
54 4467.3974134
55 4088.44747698
56 3744.41723866
57 3431.71290763
58 3146.9372394
59 2887.56

442 3.83117913828e-07
443 3.62966200812e-07
444 3.43880427041e-07
445 3.25793312401e-07
446 3.08659479864e-07
447 2.92426751506e-07
448 2.77052258201e-07
449 2.62487423447e-07
450 2.48687068314e-07
451 2.35613235998e-07
452 2.23227808052e-07
453 2.11493429598e-07
454 2.00377033426e-07
455 1.89849969735e-07
456 1.7987430574e-07
457 1.70422500945e-07
458 1.61467818863e-07
459 1.52983846491e-07
460 1.4494748904e-07
461 1.37334188437e-07
462 1.30121777101e-07
463 1.23288373706e-07
464 1.16812718464e-07
465 1.10679190184e-07
466 1.04867058972e-07
467 9.93611272477e-08
468 9.41448749801e-08
469 8.92024497062e-08
470 8.45207595556e-08
471 8.00846426146e-08
472 7.58817825015e-08
473 7.18991330643e-08
474 6.81258429396e-08
475 6.4551419095e-08
476 6.1164095713e-08
477 5.79557338941e-08
478 5.49157173385e-08
479 5.20345735811e-08
480 4.93050947202e-08
481 4.67189877583e-08
482 4.42687987364e-08
483 4.1947323653e-08
484 3.97481102806e-08
485 3.76640434634e-08
486 3.56891265111e-08
487 3.381783212