In [1]:
#in the spirit of deliberate practice - quick, simple neural networks in numpy and pytorch

In [2]:
import numpy as np
import torch
from torch import nn
from torch import autograd

### Simple neural network in Numpy

In [3]:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

num_epochs = 500 
learning_rate = 1e-6

In [4]:

# define training and test data

x = np.random.randn(N,D_in)
y = np.random.randn(N,D_out)

w1 = np.random.randn(D_in,H)
w2 = np.random.randn(H,D_out)

print("w1 ",w1.shape)
print("w2 ",w2.shape)

# training

for epoch in range(num_epochs):

    h1_out = np.dot(x,w1)
    h1_relu = np.maximum(0,h1_out)
    predictions = np.dot(h1_relu,w2)
    
    loss = np.square(predictions - y).sum()  #the sum is to get an single float from the matrix of losses

    # backward prop
    # e.g. grad_w2 is a derivative of the loss in respect to weight matrix w2
    
    grad_predictions = 2*(predictions - y)
    grad_w2 = np.dot(h1_relu.T,grad_predictions)
    grad_h1_relu = grad_predictions.dot(w2.T)
   
    grad_h1_out = grad_h1_relu.copy() #derivative of relu is essentially relu (for x=0 it's undefined, but we assume it's 0)
    grad_h1_out[h1_out <= 0] = 0 # applying relu in backprop
    
    grad_w1 = x.T.dot(grad_h1_out)
#     grad_w1 = x.T.dot(grad_h1_out).dot(w2).dot(grad_predictions.T)

    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2
    
    if epoch % 10 == 0:
        print(loss)

w1  (1000, 100)
w2  (100, 10)
29414411.3186713
1099705.2768119196
211037.44761508633
70235.94668430273
28027.216459783405
12469.514320050472
5962.416360799567
2999.780167489182
1567.8145078102823
843.4173140663796
463.93586302743176
259.6903470343769
147.37902143053935
84.57018543881539
48.96687076005258
28.576892328839325
16.798655862462528
9.928519696568767
5.89412205934819
3.512636725524398
2.100055174395472
1.2590331442792155
0.7566041897175123
0.4556297135862935
0.27485914457381566
0.16607879270225273
0.10048329792133362
0.060870186235282606
0.03691374226220619
0.02240631241478453
0.013611750673269307
0.008275214594386064
0.005034530616264805
0.003064705010738344
0.0018666079850745735
0.0011374293756767362
0.0006933988233606237
0.00042289882288744043
0.0002580126832501718
0.00015747054537804104
9.613747003442844e-05
5.870735705237897e-05
3.586069252056624e-05
2.1910630848374097e-05
1.3390155796755289e-05
8.184729071241584e-06
5.004040355504139e-06
3.0599090909543413e-06
1.87146340

In [5]:
# def print_name_and_shape(*args):       
#     for a in args:
#         print(a.shape)

# print_name_and_shape(x,grad_h1_out,w2,grad_predictions)
# x(64, 1000).T.dot(grad_h1_out(64, 100)).dot(w2(100, 10)).dot((grad_predictions.T(64, 10)))


### Now let's try the same simple neural network using PyTorch.

In [6]:
torch.__version__


'0.4.0'

In [11]:
# define training and test data

x = torch.tensor(torch.randn(N,D_in), requires_grad=False)
y = torch.tensor(torch.randn(N,D_out), requires_grad=False)

w1 = torch.tensor(torch.randn(D_in,H), requires_grad=True)
w2 = torch.tensor(torch.randn(H,D_out), requires_grad=True)


# training

for epoch in range(num_epochs):
    
    h1_out = x.mm(w1) # 64,100
    h1_relu = torch.clamp(h1_out,0) #not sure this actually works
    predictions = torch.mm(h1_relu, w2)
    
    loss = torch.pow(predictions - y, 2).sum()
    
    if epoch % 10 == 0:
        print(loss.item())
    
    #backprop
    loss.backward()
    
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad

    w1.grad.zero_()
    w2.grad.zero_()

40337560.0
1114492.25
257671.359375
92247.703125
39567.12890625
18807.6796875
9537.228515625
5067.7353515625
2792.688720703125
1582.8270263671875
918.08544921875
542.9712524414062
326.52789306640625
199.17181396484375
123.00042724609375
76.78062438964844
48.38643264770508
30.74588966369629
19.681882858276367
12.681753158569336
8.21949291229248
5.355597019195557
3.506243944168091
2.305366039276123
1.521742343902588
1.0080530643463135
0.6700431704521179
0.44664689898490906
0.2986682951450348
0.20014753937721252
0.13446134328842163
0.09057148545980453
0.061124276369810104
0.04135604202747345
0.028059158474206924
0.019085630774497986
0.013049609959125519
0.008977129124104977
0.006221895106136799
0.004367586225271225
0.003104274393990636
0.00224212440662086
0.0016484413063153625
0.0012329440796747804
0.0009398906258866191
0.0007280685240402818
0.0005728733958676457
0.0004580894601531327
0.00037230856833048165
0.0003048597718589008
