In [21]:
import matplotlib.pyplot as plt
%matplotlib inline

In [1]:
# numpy tensors
import numpy as np

N, D_in, H, D_out = 64, 1000, 100, 10

x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(500):
    
    # forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)
    
    # loss
    loss = np.square(y_pred - y).sum()
    print('Loss at round {}: {}'.format(t, loss))
    
    # backprop: compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)
    
    # update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

Loss at round 0: 28394648.53032766
Loss at round 1: 24497048.28056024
Loss at round 2: 22609684.259916097
Loss at round 3: 20269396.160558
Loss at round 4: 16612157.205853652
Loss at round 5: 12298964.784933168
Loss at round 6: 8307614.153218962
Loss at round 7: 5352121.580103163
Loss at round 8: 3414837.3587737037
Loss at round 9: 2245296.2210331066
Loss at round 10: 1549452.4460483163
Loss at round 11: 1130933.3224229398
Loss at round 12: 867847.7394074388
Loss at round 13: 693403.7792290696
Loss at round 14: 570752.3907710693
Loss at round 15: 479813.69149390655
Loss at round 16: 409311.46104712813
Loss at round 17: 352814.04845361214
Loss at round 18: 306397.3225773753
Loss at round 19: 267662.0260300071
Loss at round 20: 234938.56408927625
Loss at round 21: 207016.9172856561
Loss at round 22: 183067.5715254324
Loss at round 23: 162417.11884599188
Loss at round 24: 144503.66025009507
Loss at round 25: 128906.49629518311
Loss at round 26: 115293.32897693952
Loss at round 27: 103366.

Loss at round 233: 0.7212525374900315
Loss at round 234: 0.6881360912956831
Loss at round 235: 0.656549658555964
Loss at round 236: 0.6264107271218649
Loss at round 237: 0.5976578750804562
Loss at round 238: 0.5702548837368135
Loss at round 239: 0.5441047385095859
Loss at round 240: 0.5191612817829578
Loss at round 241: 0.49537193983573924
Loss at round 242: 0.4726945496717879
Loss at round 243: 0.45106308070999557
Loss at round 244: 0.4304184566769641
Loss at round 245: 0.4107290664624752
Loss at round 246: 0.39195163367549246
Loss at round 247: 0.3740408067220117
Loss at round 248: 0.3569499340977602
Loss at round 249: 0.3406538550505998
Loss at round 250: 0.325101354849897
Loss at round 251: 0.3102722923979767
Loss at round 252: 0.2961187012303163
Loss at round 253: 0.2826094331563138
Loss at round 254: 0.2697205026775053
Loss at round 255: 0.2574284387047768
Loss at round 256: 0.24570116553833465
Loss at round 257: 0.23451235897998224
Loss at round 258: 0.22383215513810079
Loss at 

Loss at round 442: 5.05284760409057e-05
Loss at round 443: 4.830545384655141e-05
Loss at round 444: 4.618027957970168e-05
Loss at round 445: 4.414928887304058e-05
Loss at round 446: 4.220911094711478e-05
Loss at round 447: 4.035261530249275e-05
Loss at round 448: 3.857804806656777e-05
Loss at round 449: 3.6881677827350104e-05
Loss at round 450: 3.526011273690195e-05
Loss at round 451: 3.371079248990443e-05
Loss at round 452: 3.2229908114425914e-05
Loss at round 453: 3.081499287368258e-05
Loss at round 454: 2.9461373117156734e-05
Loss at round 455: 2.8166681298259217e-05
Loss at round 456: 2.692957818870038e-05
Loss at round 457: 2.5746881632268964e-05
Loss at round 458: 2.4616121187100227e-05
Loss at round 459: 2.3534750712554656e-05
Loss at round 460: 2.250084615629243e-05
Loss at round 461: 2.1512689434599112e-05
Loss at round 462: 2.0568749783011967e-05
Loss at round 463: 1.9665818629047964e-05
Loss at round 464: 1.8802579394577916e-05
Loss at round 465: 1.7977057618055374e-05
Loss 

In [2]:
# pytorch tensors
import torch

dtype = torch.float
device = torch.device('cpu')

N, D_in, H, D_out = 64, 1000, 100, 10

x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

w1 = torch.randn(D_in, H, device=device, dtype=dtype)
w2 = torch.randn(H, D_out, device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(500):
    
    # forward pass: compute predicted y
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_pred = h_relu.mm(w2)
    
    # loss
    loss = (y_pred - y).pow(2).sum().item()
    print('Loss at round {}: {}'.format(t, loss))
    
    # backprop: compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h < 0] = 0
    grad_w1 = x.t().mm(grad_h)
    
    # update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

Loss at round 0: 32702780.0
Loss at round 1: 33617432.0
Loss at round 2: 40331536.0
Loss at round 3: 45107748.0
Loss at round 4: 40159556.0
Loss at round 5: 25639390.0
Loss at round 6: 12222048.0
Loss at round 7: 5109839.0
Loss at round 8: 2423877.0
Loss at round 9: 1462242.375
Loss at round 10: 1067006.875
Loss at round 11: 857233.9375
Loss at round 12: 718027.8125
Loss at round 13: 612968.8125
Loss at round 14: 528674.625
Loss at round 15: 459192.0625
Loss at round 16: 401101.21875
Loss at round 17: 352102.90625
Loss at round 18: 310435.125
Loss at round 19: 274849.34375
Loss at round 20: 244257.703125
Loss at round 21: 217829.921875
Loss at round 22: 194855.171875
Loss at round 23: 174805.171875
Loss at round 24: 157236.921875
Loss at round 25: 141779.4375
Loss at round 26: 128105.984375
Loss at round 27: 115998.578125
Loss at round 28: 105257.296875
Loss at round 29: 95708.6953125
Loss at round 30: 87200.203125
Loss at round 31: 79595.3671875
Loss at round 32: 72773.734375
Loss at 

Loss at round 275: 0.2694326937198639
Loss at round 276: 0.2579752802848816
Loss at round 277: 0.2470010221004486
Loss at round 278: 0.23650692403316498
Loss at round 279: 0.22647494077682495
Loss at round 280: 0.21687620878219604
Loss at round 281: 0.20765118300914764
Loss at round 282: 0.1988414078950882
Loss at round 283: 0.19041845202445984
Loss at round 284: 0.18236662447452545
Loss at round 285: 0.174630805850029
Loss at round 286: 0.1672448366880417
Loss at round 287: 0.16014733910560608
Loss at round 288: 0.15339981019496918
Loss at round 289: 0.14689810574054718
Loss at round 290: 0.14068186283111572
Loss at round 291: 0.13472780585289001
Loss at round 292: 0.12906384468078613
Loss at round 293: 0.12358927726745605
Loss at round 294: 0.11836518347263336
Loss at round 295: 0.1133994311094284
Loss at round 296: 0.1086231917142868
Loss at round 297: 0.1040472686290741
Loss at round 298: 0.0996805876493454
Loss at round 299: 0.09548243880271912
Loss at round 300: 0.091491490602493

In [3]:
# autograd
dtype = torch.float
device = torch.device('cpu')

N, D_in, H, D_out = 64, 1000, 100, 10

x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(500):
    
    # forward pass: compute predicted y
    y_pred = x.mm(w1).clamp(min=0).mm(w2)
    
    # loss
    loss = (y_pred - y).pow(2).sum()
    print('Loss at round {}: {}'.format(t, loss.item()))
    
    # backprop: compute gradients of w1 and w2 with respect to loss
    loss.backward()
    
    # update weights
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad
        
        w1.grad.zero_()
        w2.grad.zero_()

Loss at round 0: 39451892.0
Loss at round 1: 48917860.0
Loss at round 2: 68999792.0
Loss at round 3: 77645632.0
Loss at round 4: 53191612.0
Loss at round 5: 18419210.0
Loss at round 6: 4608757.5
Loss at round 7: 1918752.625
Loss at round 8: 1342368.375
Loss at round 9: 1084257.5
Loss at round 10: 903758.75
Loss at round 11: 762464.25
Loss at round 12: 648613.25
Loss at round 13: 555669.0
Loss at round 14: 479189.9375
Loss at round 15: 415574.40625
Loss at round 16: 362183.53125
Loss at round 17: 317157.78125
Loss at round 18: 278933.1875
Loss at round 19: 246317.6875
Loss at round 20: 218310.9375
Loss at round 21: 194144.796875
Loss at round 22: 173237.953125
Loss at round 23: 155032.859375
Loss at round 24: 139113.0
Loss at round 25: 125124.9296875
Loss at round 26: 112811.4296875
Loss at round 27: 101937.3515625
Loss at round 28: 92302.578125
Loss at round 29: 83741.7265625
Loss at round 30: 76123.1171875
Loss at round 31: 69323.53125
Loss at round 32: 63225.8046875
Loss at round 33:

Loss at round 257: 0.5277636647224426
Loss at round 258: 0.5067318677902222
Loss at round 259: 0.4865209758281708
Loss at round 260: 0.4671705663204193
Loss at round 261: 0.44855228066444397
Loss at round 262: 0.43075263500213623
Loss at round 263: 0.41362464427948
Loss at round 264: 0.39720672369003296
Loss at round 265: 0.38147562742233276
Loss at round 266: 0.36629974842071533
Loss at round 267: 0.35182881355285645
Loss at round 268: 0.337875634431839
Loss at round 269: 0.3245227038860321
Loss at round 270: 0.31172552704811096
Loss at round 271: 0.2994130849838257
Loss at round 272: 0.287610799074173
Loss at round 273: 0.2762642502784729
Loss at round 274: 0.2653757333755493
Loss at round 275: 0.2549067437648773
Loss at round 276: 0.24494682252407074
Loss at round 277: 0.2353169471025467
Loss at round 278: 0.22604495286941528
Loss at round 279: 0.21719585359096527
Loss at round 280: 0.20865283906459808
Loss at round 281: 0.20051877200603485
Loss at round 282: 0.19262836873531342
Los

Loss at round 490: 0.00026112684281542897
Loss at round 491: 0.0002558740961831063
Loss at round 492: 0.00025108284899033606
Loss at round 493: 0.00024597052833996713
Loss at round 494: 0.00024267975823022425
Loss at round 495: 0.00023798132315278053
Loss at round 496: 0.0002342738298466429
Loss at round 497: 0.00022940551571082324
Loss at round 498: 0.0002262008929392323
Loss at round 499: 0.00022250301844906062


In [4]:
# define new autograd functions
class MyReLU(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return input.clamp(min=0)
    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        grad_input = grad_output.clone()
        grad_input[input < 0] = 0
        return grad_input

dtype = torch.float
device = torch.device('cpu')

N, D_in, H, D_out = 64, 1000, 100, 10

x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(500):
    
    relu = MyReLU.apply
    
    # forward pass: compute predicted y
    y_pred = relu(x.mm(w1)).mm(w2)
    
    # loss
    loss = (y_pred - y).pow(2).sum()
    print('Loss at round {}: {}'.format(t, loss.item()))
    
    # backprop: compute gradients of w1 and w2 with respect to loss
    loss.backward()
    
    # update weights
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad
        
        w1.grad.zero_()
        w2.grad.zero_()

Loss at round 0: 29130864.0
Loss at round 1: 25791472.0
Loss at round 2: 25342648.0
Loss at round 3: 23934704.0
Loss at round 4: 20299774.0
Loss at round 5: 14784923.0
Loss at round 6: 9546630.0
Loss at round 7: 5684624.5
Loss at round 8: 3363586.5
Loss at round 9: 2072436.875
Loss at round 10: 1376358.5
Loss at round 11: 987486.25
Loss at round 12: 756304.5
Loss at round 13: 607079.75
Loss at round 14: 502971.4375
Loss at round 15: 425326.15625
Loss at round 16: 364553.71875
Loss at round 17: 315309.625
Loss at round 18: 274574.125
Loss at round 19: 240368.296875
Loss at round 20: 211381.578125
Loss at round 21: 186589.3125
Loss at round 22: 165265.40625
Loss at round 23: 146839.421875
Loss at round 24: 130853.9453125
Loss at round 25: 116933.9765625
Loss at round 26: 104753.8984375
Loss at round 27: 94070.875
Loss at round 28: 84665.3828125
Loss at round 29: 76356.703125
Loss at round 30: 68989.8203125
Loss at round 31: 62448.01953125
Loss at round 32: 56622.9453125
Loss at round 33:

Loss at round 245: 1.1539887189865112
Loss at round 246: 1.1107964515686035
Loss at round 247: 1.0692590475082397
Loss at round 248: 1.0292783975601196
Loss at round 249: 0.9908382296562195
Loss at round 250: 0.9538210034370422
Loss at round 251: 0.9182789325714111
Loss at round 252: 0.8840813636779785
Loss at round 253: 0.8511658310890198
Loss at round 254: 0.819465696811676
Loss at round 255: 0.7890856266021729
Loss at round 256: 0.7597796320915222
Loss at round 257: 0.7315014004707336
Loss at round 258: 0.7043741345405579
Loss at round 259: 0.6782810091972351
Loss at round 260: 0.6531245112419128
Loss at round 261: 0.6289039254188538
Loss at round 262: 0.6056643128395081
Loss at round 263: 0.5832009315490723
Loss at round 264: 0.5616495609283447
Loss at round 265: 0.5409284830093384
Loss at round 266: 0.5209596753120422
Loss at round 267: 0.5017399191856384
Loss at round 268: 0.4831889569759369
Loss at round 269: 0.46542176604270935
Loss at round 270: 0.4482312798500061
Loss at roun

Loss at round 485: 0.0004287608899176121
Loss at round 486: 0.0004205484001431614
Loss at round 487: 0.0004114245530217886
Loss at round 488: 0.0004030752170365304
Loss at round 489: 0.0003946878423448652
Loss at round 490: 0.00038656260585412383
Loss at round 491: 0.000378541270038113
Loss at round 492: 0.00037062520277686417
Loss at round 493: 0.0003636359761003405
Loss at round 494: 0.00035607494646683335
Loss at round 495: 0.0003490852832328528
Loss at round 496: 0.0003418561245780438
Loss at round 497: 0.00033568067010492086
Loss at round 498: 0.0003293360641691834
Loss at round 499: 0.0003227450361009687


In [5]:
# tensorflow static graph
import tensorflow as tf

N, D_in, H, D_out = 64, 1000, 100, 10

x = tf.placeholder(tf.float32, shape=(None, D_in))
y = tf.placeholder(tf.float32, shape=(None, D_out))

w1 = tf.Variable(tf.random_normal((D_in, H)))
w2 = tf.Variable(tf.random_normal((H, D_out)))

h = tf.matmul(x, w1)
h_relu = tf.maximum(h, tf.zeros(1))
y_pred = tf.matmul(h_relu, w2)

loss = tf.reduce_sum((y - y_pred) ** 2.0)

grad_w1, grad_w2 = tf.gradients(loss, [w1, w2])

learning_rate = 1e-6
new_w1 = w1.assign(w1 - learning_rate * grad_w1)
new_w2 = w2.assign(w2 - learning_rate * grad_w2)

with tf.Session() as session:
    session.run(tf.global_variables_initializer())
    
    x_value = np.random.randn(N, D_in)
    y_value = np.random.randn(N, D_out)
    
    for _ in range(500):
        loss_value, _, _ = session.run([loss, new_w1, new_w2], feed_dict={x: x_value, y: y_value})
        print(loss_value)

3.6851e+07
3.6905e+07
4.18984e+07
4.29629e+07
3.45694e+07
2.04238e+07
9.56471e+06
4.3028e+06
2.27149e+06
1.47602e+06
1.10601e+06
889448.0
739077.0
623827.0
531285.0
455387.0
392366.0
339801.0
295492.0
257920.0
225937.0
198543.0
174983.0
154636.0
137008.0
121678.0
108306.0
96611.7
86350.4
77328.6
69369.7
62333.8
56104.8
50577.6
45660.6
41277.9
37369.4
33871.9
30738.6
27927.5
25401.4
23126.5
21076.8
19228.2
17557.1
16045.5
14677.8
13437.3
12311.4
11288.7
10358.7
9513.75
8745.05
8043.6
7403.24
6818.14
6283.4
5793.93
5345.77
4934.95
4557.96
4212.03
3894.33
3602.6
3334.37
3087.44
2860.09
2650.74
2457.65
2279.53
2115.2
1962.31
1821.25
1691.06
1570.7
1459.51
1356.69
1261.54
1173.47
1091.92
1016.34
946.308
881.378
821.148
765.238
713.343
665.137
620.356
578.756
540.072
504.111
470.661
439.53
410.542
383.574
358.443
335.029
313.218
292.883
273.924
256.252
239.76
224.368
210.002
196.596
184.069
172.372
161.452
151.243
141.699
132.777
124.44
116.64
109.345
102.523
96.1373
90.1609
84.569
79.3349
7

In [11]:
# pytorch nn
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = torch.nn.Sequential(torch.nn.Linear(D_in, H), 
                            torch.nn.ReLU(), 
                            torch.nn.Linear(H, D_out))

learning_rate = 1e-4

for i in range(500):
    # forward
    y_pred = model(x)
    
    # get loss
    criterion = torch.nn.MSELoss(size_average=False)
    loss = criterion(y_pred, y)
    print('{} loss in round {}'.format(loss.item(), i))
    
    # zero gradients
    model.zero_grad()
    
    # backward
    loss.backward()
    
    # update weights
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

687.966064453125 loss in round 0
638.1206665039062 loss in round 1
595.5632934570312 loss in round 2
558.3875122070312 loss in round 3
525.3604736328125 loss in round 4
495.5038146972656 loss in round 5
468.5607604980469 loss in round 6
443.68487548828125 loss in round 7
420.9007873535156 loss in round 8
399.6797180175781 loss in round 9
379.73968505859375 loss in round 10
360.9268493652344 loss in round 11
343.0292053222656 loss in round 12
326.0562438964844 loss in round 13
309.8797607421875 loss in round 14
294.4360656738281 loss in round 15
279.7181396484375 loss in round 16
265.691162109375 loss in round 17
252.2763214111328 loss in round 18
239.49131774902344 loss in round 19
227.28839111328125 loss in round 20
215.56768798828125 loss in round 21
204.32225036621094 loss in round 22
193.55592346191406 loss in round 23
183.2407684326172 loss in round 24
173.35867309570312 loss in round 25
163.92520141601562 loss in round 26
154.94297790527344 loss in round 27
146.4103240966797 loss

0.008406143635511398 loss in round 229
0.00808846578001976 loss in round 230
0.007783106062561274 loss in round 231
0.007489598821848631 loss in round 232
0.0072073922492563725 loss in round 233
0.006936565972864628 loss in round 234
0.006676266901195049 loss in round 235
0.006425663363188505 loss in round 236
0.006184722296893597 loss in round 237
0.005953419022262096 loss in round 238
0.005730795674026012 loss in round 239
0.005516817793250084 loss in round 240
0.005311031360179186 loss in round 241
0.005113140679895878 loss in round 242
0.0049232118763029575 loss in round 243
0.0047405194491147995 loss in round 244
0.004564560484141111 loss in round 245
0.004395428579300642 loss in round 246
0.00423271581530571 loss in round 247
0.004076220560818911 loss in round 248
0.003925693687051535 loss in round 249
0.0037809540517628193 loss in round 250
0.003641691291704774 loss in round 251
0.0035076977219432592 loss in round 252
0.0033786867279559374 loss in round 253
0.0032546930015087128

5.462711214931915e-06 loss in round 441
5.292851710692048e-06 loss in round 442
5.1284077926538885e-06 loss in round 443
4.968504072166979e-06 loss in round 444
4.814137355424464e-06 loss in round 445
4.6646359805890825e-06 loss in round 446
4.520250968198525e-06 loss in round 447
4.380665814096574e-06 loss in round 448
4.243958755978383e-06 loss in round 449
4.1129433157038875e-06 loss in round 450
3.985937837569509e-06 loss in round 451
3.862011908495333e-06 loss in round 452
3.7437694118125364e-06 loss in round 453
3.6275473576097284e-06 loss in round 454
3.515418711685925e-06 loss in round 455
3.4078159387718188e-06 loss in round 456
3.302596724097384e-06 loss in round 457
3.2013515465223463e-06 loss in round 458
3.10250288748648e-06 loss in round 459
3.006684892170597e-06 loss in round 460
2.9148081921448465e-06 loss in round 461
2.8250826744624646e-06 loss in round 462
2.7381506697565783e-06 loss in round 463
2.654885065567214e-06 loss in round 464
2.572874336692621e-06 loss in r

In [12]:
# pytorch optim
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = torch.nn.Sequential(torch.nn.Linear(D_in, H), 
                            torch.nn.ReLU(), 
                            torch.nn.Linear(H, D_out))

learning_rate = 1e-4
optimizer = torch.optim.Adam(lr=learning_rate, params=model.parameters())

for i in range(500):
    # forward
    y_pred = model(x)
    
    # get loss
    criterion = torch.nn.MSELoss(size_average=False)
    loss = criterion(y_pred, y)
    print('{} loss in round {}'.format(loss.item(), i))
    
    # zero gradients
    optimizer.zero_grad()
    
    # backward
    loss.backward()
    
    # update weights
    optimizer.step()

669.3147583007812 loss in round 0
652.7056274414062 loss in round 1
636.5620727539062 loss in round 2
620.8784790039062 loss in round 3
605.6614990234375 loss in round 4
590.7864379882812 loss in round 5
576.2739868164062 loss in round 6
562.1890258789062 loss in round 7
548.5286254882812 loss in round 8
535.2650756835938 loss in round 9
522.4939575195312 loss in round 10
510.10986328125 loss in round 11
498.1087951660156 loss in round 12
486.5279235839844 loss in round 13
475.3247375488281 loss in round 14
464.39984130859375 loss in round 15
453.71942138671875 loss in round 16
443.30902099609375 loss in round 17
433.1990966796875 loss in round 18
423.2939147949219 loss in round 19
413.6321716308594 loss in round 20
404.2043762207031 loss in round 21
394.9826354980469 loss in round 22
386.01605224609375 loss in round 23
377.2984924316406 loss in round 24
368.8080139160156 loss in round 25
360.4991455078125 loss in round 26
352.3459167480469 loss in round 27
344.3837890625 loss in round

0.09253298491239548 loss in round 232
0.08752810955047607 loss in round 233
0.08278611302375793 loss in round 234
0.07829640060663223 loss in round 235
0.0740475282073021 loss in round 236
0.07001899927854538 loss in round 237
0.06620940566062927 loss in round 238
0.0626075491309166 loss in round 239
0.05919189751148224 loss in round 240
0.05596238747239113 loss in round 241
0.05290279537439346 loss in round 242
0.05000835657119751 loss in round 243
0.0472702756524086 loss in round 244
0.044678036123514175 loss in round 245
0.042224664241075516 loss in round 246
0.03990840166807175 loss in round 247
0.03771141171455383 loss in round 248
0.03563477471470833 loss in round 249
0.033669255673885345 loss in round 250
0.031810905784368515 loss in round 251
0.030054112896323204 loss in round 252
0.02839105762541294 loss in round 253
0.026818156242370605 loss in round 254
0.0253299530595541 loss in round 255
0.02392386645078659 loss in round 256
0.022595761343836784 loss in round 257
0.0213396

3.9825561515272057e-08 loss in round 464
3.7145316156284025e-08 loss in round 465
3.4656398639754116e-08 loss in round 466
3.231230394362683e-08 loss in round 467
3.0144139628873745e-08 loss in round 468
2.8093063875189728e-08 loss in round 469
2.6196165237024616e-08 loss in round 470
2.4425975908570763e-08 loss in round 471
2.273824684095871e-08 loss in round 472
2.1182827936172544e-08 loss in round 473
1.9793038319448897e-08 loss in round 474
1.837452323627531e-08 loss in round 475
1.7184884626431085e-08 loss in round 476
1.6023168569745394e-08 loss in round 477
1.491832790634362e-08 loss in round 478
1.3893171946222083e-08 loss in round 479
1.2965048590274364e-08 loss in round 480
1.2117193470828624e-08 loss in round 481
1.1308459946235416e-08 loss in round 482
1.0531597816054727e-08 loss in round 483
9.842240800139734e-09 loss in round 484
9.16828835073602e-09 loss in round 485
8.563587172716325e-09 loss in round 486
7.966846737872402e-09 loss in round 487
7.426163683987852e-09 los

In [14]:
# pytorch custom nn modules
class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        super(TwoLayerNet, self).__init__()
        
        self.l1 = torch.nn.Linear(D_in, H)
        self.r = torch.nn.ReLU()
        self.l2 = torch.nn.Linear(H, D_out)
        
    def forward(self, x):
        x = self.l1(x)
        x = self.r(x)
        x = self.l2(x)
        
        return x
        
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = TwoLayerNet(D_in, H, D_out)

learning_rate = 1e-4
optimizer = torch.optim.Adam(lr=learning_rate, params=model.parameters())

for i in range(500):
    # forward
    y_pred = model(x)
    
    # get loss
    criterion = torch.nn.MSELoss(size_average=False)
    loss = criterion(y_pred, y)
    print('{} loss in round {}'.format(loss.item(), i))
    
    # zero gradients
    optimizer.zero_grad()
    
    # backward
    loss.backward()
    
    # update weights
    optimizer.step()

688.3096923828125 loss in round 0
670.942626953125 loss in round 1
654.1439208984375 loss in round 2
637.7604370117188 loss in round 3
621.87646484375 loss in round 4
606.42041015625 loss in round 5
591.39208984375 loss in round 6
576.8668212890625 loss in round 7
562.7256469726562 loss in round 8
548.9468383789062 loss in round 9
535.5172729492188 loss in round 10
522.466552734375 loss in round 11
509.78173828125 loss in round 12
497.3860168457031 loss in round 13
485.3567199707031 loss in round 14
473.6468505859375 loss in round 15
462.2529602050781 loss in round 16
451.1494140625 loss in round 17
440.3233337402344 loss in round 18
429.82049560546875 loss in round 19
419.5977478027344 loss in round 20
409.600830078125 loss in round 21
399.8664245605469 loss in round 22
390.361572265625 loss in round 23
381.0859069824219 loss in round 24
372.0301208496094 loss in round 25
363.23065185546875 loss in round 26
354.6829528808594 loss in round 27
346.3542785644531 loss in round 28
338.2504

0.006047775968909264 loss in round 271
0.005727648735046387 loss in round 272
0.005426643881946802 loss in round 273
0.005143601913005114 loss in round 274
0.004877524450421333 loss in round 275
0.0046270666643977165 loss in round 276
0.004391437396407127 loss in round 277
0.004169719759374857 loss in round 278
0.003961008042097092 loss in round 279
0.0037644009571522474 loss in round 280
0.0035792114213109016 loss in round 281
0.0034048622474074364 loss in round 282
0.003240410704165697 loss in round 283
0.003085417440161109 loss in round 284
0.0029391967691481113 loss in round 285
0.0028013107366859913 loss in round 286
0.00267109926789999 loss in round 287
0.0025482645723968744 loss in round 288
0.0024322913959622383 loss in round 289
0.002322671702131629 loss in round 290
0.0022190664894878864 loss in round 291
0.0021210643462836742 loss in round 292
0.0020283483900129795 loss in round 293
0.001940626767463982 loss in round 294
0.001857497962191701 loss in round 295
0.0017787935212

In [20]:
# pytorch dynamic graphs
import random

class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        super(TwoLayerNet, self).__init__()
        
        self.l1 = torch.nn.Linear(D_in, H)
        self.r = torch.nn.ReLU()
        self.l2 = torch.nn.Linear(H, D_out)
        self.hidden = torch.nn.Linear(H, H)
        
    def forward(self, x):
        x = self.l1(x)
        x = self.r(x)
        
        for _ in range(random.randint(0, 3)):
            x = self.hidden(x)
            x = self.r(x)
            
        x = self.l2(x)
        
        return x
        
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = TwoLayerNet(D_in, H, D_out)

learning_rate = 1e-4
optimizer = torch.optim.Adam(lr=learning_rate, params=model.parameters())

for i in range(500):
    # forward
    y_pred = model(x)
    
    # get loss
    criterion = torch.nn.MSELoss(size_average=False)
    loss = criterion(y_pred, y)
    print('{} loss in round {}'.format(loss.item(), i))
    
    # zero gradients
    optimizer.zero_grad()
    
    # backward
    loss.backward()
    
    # update weights
    optimizer.step()

645.8209228515625 loss in round 0
677.4488525390625 loss in round 1
635.21337890625 loss in round 2
642.7467041015625 loss in round 3
642.5254516601562 loss in round 4
642.2431030273438 loss in round 5
640.017822265625 loss in round 6
643.80419921875 loss in round 7
619.7333374023438 loss in round 8
618.26708984375 loss in round 9
641.037353515625 loss in round 10
640.820556640625 loss in round 11
641.7001953125 loss in round 12
589.7315673828125 loss in round 13
581.7952270507812 loss in round 14
609.365966796875 loss in round 15
639.8191528320312 loss in round 16
606.4475708007812 loss in round 17
638.696533203125 loss in round 18
602.6088256835938 loss in round 19
600.2425537109375 loss in round 20
536.3507690429688 loss in round 21
638.9442749023438 loss in round 22
638.777099609375 loss in round 23
591.1010131835938 loss in round 24
638.3796997070312 loss in round 25
634.6846313476562 loss in round 26
637.925048828125 loss in round 27
637.6629028320312 loss in round 28
637.3743286

295.6779479980469 loss in round 247
212.5843505859375 loss in round 248
149.37136840820312 loss in round 249
287.684814453125 loss in round 250
145.27740478515625 loss in round 251
202.11326599121094 loss in round 252
278.96990966796875 loss in round 253
275.6860656738281 loss in round 254
137.27528381347656 loss in round 255
268.7453308105469 loss in round 256
133.4198455810547 loss in round 257
131.32101440429688 loss in round 258
42.758182525634766 loss in round 259
256.2270202636719 loss in round 260
125.18620300292969 loss in round 261
179.97959899902344 loss in round 262
43.606475830078125 loss in round 263
176.13551330566406 loss in round 264
117.9911117553711 loss in round 265
240.91845703125 loss in round 266
170.02870178222656 loss in round 267
43.548336029052734 loss in round 268
111.71614837646484 loss in round 269
231.61233520507812 loss in round 270
162.15792846679688 loss in round 271
160.07968139648438 loss in round 272
106.2341537475586 loss in round 273
104.7520599365