# Using the Gradient (with noisy data)

In [32]:
# pick a random starting point
v = [random.uniform(-10, 10) for i in range(3)]
v

[-8.552029759521954, 0.6127506436543744, -4.633784731842852]

In [33]:
from scratch.gradient_descent import sum_of_squares_gradient, gradient_step

for epoch in range(1000):
    grad = sum_of_squares_gradient(v)    # compute the gradient at v
    v = gradient_step(v, grad, -0.01)    # take a negative gradient step
    print(epoch, v)
    
assert distance(v, [0, 0, 0]) < 0.001    # v should be close to 0

0 [-8.380989164331515, 0.6004956307812869, -4.541109037205994]
1 [-8.213369381044885, 0.5884857181656611, -4.450286856461874]
2 [-8.049101993423987, 0.5767160038023479, -4.361281119332636]
3 [-7.888119953555507, 0.565181683726301, -4.274055496945984]
4 [-7.730357554484397, 0.5538780500517749, -4.188574387007064]
5 [-7.575750403394709, 0.5428004890507394, -4.1048028992669225]
6 [-7.424235395326815, 0.5319444792697247, -4.022706841281584]
7 [-7.275750687420279, 0.5213055896843302, -3.9422527044559525]
8 [-7.130235673671874, 0.5108794778906436, -3.8634076503668333]
9 [-6.9876309601984365, 0.5006618883328308, -3.7861394973594966]
10 [-6.847878340994468, 0.4906486505661741, -3.710416707412307]
11 [-6.710920774174578, 0.4808356775548506, -3.636208373264061]
12 [-6.576702358691087, 0.4712189640037536, -3.5634842057987797]
13 [-6.445168311517265, 0.46179458472367857, -3.492214521682804]
14 [-6.31626494528692, 0.452558693029205, -3.422370231249148]
15 [-6.189939646381182, 0.44350751916862086, -

# First "Using Gradient Descent to Fit Models" example

In [79]:
# x ranges from -50 to 49, y is always 20 * x + 5
inputs = [(x, 40 * x + 5) for x in range(-50, 50)]

import numpy as np

noise = np.random.normal(0,.2,100)
# 0 is the mean of the normal distribution you are choosing from
# 1 is the standard deviation of the normal distribution
# 100 is the number of elements you get in array noise

noisy_inputs = [(x, 40 * x + 5 + np.random.choice(noise)) for x in range(-50, 50)]

In [80]:
from scratch.linear_algebra import vector_mean, distance
from scratch.gradient_descent import linear_gradient
import random
    
# Start with random values for slope and intercept.
theta = [random.uniform(-1, 1), random.uniform(-1, 1)]
    
learning_rate = 0.001
theta

[-0.8030267460841447, -0.06416432905313818]

In [81]:
for epoch in range(5000):
    # Compute the mean of the gradients
    grad = vector_mean([linear_gradient(x, y, theta) for x, y in noisy_inputs])
    # Take a step in that direction
    theta = gradient_step(theta, grad, -learning_rate)
    print(epoch, theta)
    
slope, intercept = theta
assert 39.9 < slope < 40.1,   "slope should be about 40"
assert 4.9 < intercept < 5.1, "intercept should be about 5"

0 [67.21220311922418, -0.09486041583879787]
1 [21.846014102976753, -0.05747988058557837]
2 [52.10529955734904, -0.06554029541911263]
3 [31.922348098867896, -0.043325303968607384]
4 [45.38439893666627, -0.0413376939594843]
5 [36.40521301546477, -0.025892008332581134]
6 [42.3943454705918, -0.01945639999813326]
7 [38.3996005586304, -0.007044530425227355]
8 [41.06410782677823, 0.0013477704965713325]
9 [39.286889871224545, 0.012387794084674372]
10 [40.472305287602445, 0.02162851967004749]
11 [39.68164244560397, 0.032036179220627764]
12 [40.209024968876506, 0.04163236061010841]
13 [39.85727042203511, 0.05173673216008261]
14 [40.09190080914987, 0.06146914042011533]
15 [39.93541207335259, 0.07141671425074286]
16 [40.03980000770321, 0.08118790419791182]
17 [39.97018302668129, 0.09104393969953717]
18 [40.01662740905841, 0.1008106461491372]
19 [39.98565877271932, 0.11060426356821527]
20 [40.006324646774914, 0.1203473251161161]
21 [39.99255025184138, 0.13009156641497657]
22 [40.00174751750335, 0.1

621 [40.00010437812889, 3.5273106379477213]
622 [40.00010613434111, 3.5302347323522727]
623 [40.00010788704196, 3.533152980324227]
624 [40.00010963623846, 3.5360653935529385]
625 [40.00011138193763, 3.538971983704389]
626 [40.00011312414644, 3.5418727624212356]
627 [40.000114862871875, 3.5447677413228575]
628 [40.00011659812091, 3.5476569320054017]
629 [40.00011832990049, 3.5505403460418297]
630 [40.00012005821755, 3.5534179949819644]
631 [40.00012178307901, 3.5562898903525357]
632 [40.00012350449178, 3.559156043657228]
633 [40.00012522246277, 3.562016466376723]
634 [40.00012693699884, 3.5648711699687503]
635 [40.00012864810687, 3.5677201658681295]
636 [40.000130355793715, 3.570563465486818]
637 [40.00013206006621, 3.573401080213956]
638 [40.000133760931185, 3.5762330214159124]
639 [40.00013545839545, 3.5790593004363296]
640 [40.000137152465804, 3.5818799285961704]
641 [40.00013884314903, 3.5846949171937617]
642 [40.00014053045192, 3.587504277504841]
643 [40.0001422143812, 3.5903080207

1310 [40.00076154020163, 4.621485883753722]
1311 [40.0007619824844, 4.622222284828734]
1312 [40.00076242388287, 4.622957213543879]
1313 [40.0007628643988, 4.623690672842992]
1314 [40.000763304033974, 4.624422665664023]
1315 [40.000763742790134, 4.625153194939046]
1316 [40.00076418066905, 4.625882263594276]
1317 [40.000764617672466, 4.626609874550074]
1318 [40.00076505380215, 4.627336030720964]
1319 [40.000765489059816, 4.628060735015643]
1320 [40.00076592344725, 4.628783990336989]
1321 [40.00076635696615, 4.62950579958208]
1322 [40.00076678961829, 4.630226165642199]
1323 [40.000767221405376, 4.630945091402851]
1324 [40.000767652329145, 4.631662579743769]
1325 [40.00076808239133, 4.632378633538928]
1326 [40.00076851159365, 4.63309325565656]
1327 [40.00076893993782, 4.633806448959158]
1328 [40.00076936742556, 4.634518216303495]
1329 [40.000769794058584, 4.635228560540631]
1330 [40.00077021983859, 4.635937484515926]
1331 [40.00077064476731, 4.63664499106905]
1332 [40.000771068846404, 4.63

2017 [40.00092901045573, 4.900323957412902]
2018 [40.00092911789857, 4.90050284981085]
2019 [40.00092922512659, 4.9006813845314445]
2020 [40.000929332140224, 4.900859562289826]
2021 [40.00092943893989, 4.9010373837997045]
2022 [40.00092954552602, 4.901214849773363]
2023 [40.00092965189905, 4.901391960921661]
2024 [40.000929758059385, 4.901568717954034]
2025 [40.000929864007475, 4.901745121578504]
2026 [40.00092996974372, 4.901921172501672]
2027 [40.00093007526857, 4.90209687142873]
2028 [40.00093018058242, 4.902272219063459]
2029 [40.00093028568572, 4.902447216108232]
2030 [40.000930390578866, 4.902621863264019]
2031 [40.00093049526229, 4.902796161230388]
2032 [40.000930599736414, 4.902970110705507]
2033 [40.00093070400165, 4.90314371238615]
2034 [40.00093080805842, 4.903316966967697]
2035 [40.00093091190713, 4.903489875144138]
2036 [40.00093101554822, 4.903662437608075]
2037 [40.00093111898207, 4.903834655050725]
2038 [40.00093122220914, 4.904006528161923]
2039 [40.00093132522979, 4.9

2707 [40.00096924185861, 4.967309267376431]
2708 [40.00096926886281, 4.9673542293858555]
2709 [40.00096929581302, 4.967399101498264]
2710 [40.000969322709345, 4.967443883893399]
2711 [40.00096934955189, 4.96748857675064]
2712 [40.00096937634077, 4.967533180249008]
2713 [40.000969403076084, 4.967577694567169]
2714 [40.00096942975795, 4.967622119883428]
2715 [40.00096945638646, 4.967666456375737]
2716 [40.000969482961736, 4.967710704221689]
2717 [40.00096950948387, 4.967754863598525]
2718 [40.00096953595298, 4.96779893468313]
2719 [40.00096956236917, 4.9678429176520345]
2720 [40.000969588732545, 4.967886812681417]
2721 [40.0009696150432, 4.967930619947105]
2722 [40.00096964130126, 4.967974339624572]
2723 [40.000969667506816, 4.968017971888942]
2724 [40.000969693659975, 4.9680615169149895]
2725 [40.00096971976084, 4.9681049748771375]
2726 [40.00096974580953, 4.968148345949462]
2727 [40.000969771806126, 4.96819163030569]
2728 [40.00096979775075, 4.968234828119203]
2729 [40.0009698236435, 4

3379 [40.000979228918375, 4.983937727874131]
3380 [40.000979235954446, 4.983949442949619]
3381 [40.00097924297646, 4.9839611346019925]
3382 [40.00097924998443, 4.983972802878083]
3383 [40.00097925697839, 4.983984447824629]
3384 [40.00097926395837, 4.983996069488276]
3385 [40.00097927092438, 4.984007667915575]
3386 [40.00097927787648, 4.984019243152987]
3387 [40.00097928481467, 4.984030795246875]
3388 [40.00097929173899, 4.984042324243514]
3389 [40.00097929864947, 4.984053830189084]
3390 [40.000979305546124, 4.9840653131296735]
3391 [40.000979312428996, 4.984076773111278]
3392 [40.000979319298104, 4.984088210179802]
3393 [40.00097932615348, 4.9840996243810585]
3394 [40.00097933299514, 4.984111015760767]
3395 [40.00097933982313, 4.984122384364559]
3396 [40.000979346637465, 4.9841337302379705]
3397 [40.000979353438176, 4.98414505342645]
3398 [40.000979360225294, 4.984156353975353]
3399 [40.000979366998834, 4.984167631929945]
3400 [40.00097937375884, 4.984178887335402]
3401 [40.00097938050

4074 [40.00098187234758, 4.9883390390983315]
4075 [40.00098187409839, 4.988341954194801]
4076 [40.000981875845696, 4.988344863462827]
4077 [40.00098187758951, 4.988347766914065]
4078 [40.00098187932984, 4.988350664560144]
4079 [40.000981881066686, 4.9883535564126715]
4080 [40.00098188280006, 4.988356442483231]
4081 [40.00098188452997, 4.988359322783382]
4082 [40.000981886256426, 4.988362197324663]
4083 [40.00098188797942, 4.988365066118588]
4084 [40.00098188969898, 4.988367929176648]
4085 [40.00098189141509, 4.988370786510312]
4086 [40.00098189312778, 4.988373638131024]
4087 [40.00098189483703, 4.988376484050208]
4088 [40.00098189654288, 4.988379324279262]
4089 [40.00098189824531, 4.988382158829564]
4090 [40.00098189994434, 4.988384987712468]
4091 [40.00098190163997, 4.988387810939305]
4092 [40.00098190333221, 4.988390628521385]
4093 [40.000981905021064, 4.988393440469992]
4094 [40.00098190670655, 4.988396246796391]
4095 [40.00098190838866, 4.988399047511823]
4096 [40.0009819100674, 4.

4758 [40.00098252526956, 4.989426154585036]
4759 [40.00098252571492, 4.989426896103454]
4760 [40.00098252615938, 4.989427636139279]
4761 [40.00098252660296, 4.9894283746954775]
4762 [40.00098252704565, 4.989429111775007]
4763 [40.00098252748746, 4.989429847380821]
4764 [40.00098252792837, 4.989430581515864]
4765 [40.00098252836842, 4.989431314183079]
4766 [40.000982528807576, 4.989432045385399]
4767 [40.00098252924586, 4.989432775125754]
4768 [40.00098252968327, 4.989433503407065]
4769 [40.00098253011979, 4.989434230232252]
4770 [40.00098253055546, 4.989434955604225]
4771 [40.00098253099024, 4.98943567952589]
4772 [40.000982531424164, 4.9894364020001465]
4773 [40.00098253185721, 4.989437123029888]
4774 [40.0009825322894, 4.9894378426180035]
4775 [40.00098253272072, 4.989438560767375]
4776 [40.00098253315117, 4.989439277480878]
4777 [40.000982533580775, 4.989439992761386]
4778 [40.000982534009516, 4.989440706611762]
4779 [40.00098253443739, 4.989441419034866]
4780 [40.00098253486443, 4.

# Minibatch Gradient Descent

In [82]:
# x ranges from -50 to 49, y is always 20 * x + 5
inputs = [(x, 40 * x + 5) for x in range(-50, 50)]

import numpy as np

noise = np.random.normal(0,.2,100)
# 0 is the mean of the normal distribution you are choosing from
# 1 is the standard deviation of the normal distribution
# 100 is the number of elements you get in array noise

noisy_inputs = [(x, 40 * x + 5 + np.random.choice(noise)) for x in range(-50, 50)]

In [83]:
from scratch.gradient_descent import minibatches

theta = [random.uniform(-1, 1), random.uniform(-1, 1)]

for epoch in range(1000):
    for batch in minibatches(noisy_inputs, batch_size=20):
        grad = vector_mean([linear_gradient(x, y, theta) for x, y in batch])
        theta = gradient_step(theta, grad, -learning_rate)
    print(epoch, theta)

slope, intercept = theta
assert 39.9 < slope < 40.1,   "slope should be about 40"
assert 4.9 < intercept < 5.1, "intercept should be about 5"

0 [37.69397806901106, -18.630474414302675]
1 [39.02181135796395, -18.633782951123752]
2 [38.5432932172678, -18.57130930229576]
3 [38.95929017935309, -17.383515758672345]
4 [39.163546308998015, -17.272340844444457]
5 [38.90241792121348, -17.14843376696083]
6 [41.39029450545435, -16.937579840685785]
7 [40.42808324620217, -16.963957962106516]
8 [39.30124762205737, -16.680845840204174]
9 [39.03427300999933, -15.697629562446817]
10 [30.519936847193435, -15.09797796039563]
11 [36.33714206877209, -13.324577775020112]
12 [40.231587825850994, -12.932341811859288]
13 [41.18806886036434, -12.875964004253833]
14 [40.780639845316536, -12.922278765866324]
15 [41.314897600138245, -12.877124404170619]
16 [39.4216938300589, -12.369156506194647]
17 [38.93970201827857, -12.321678779858205]
18 [47.184114962839516, -11.905476146116087]
19 [39.76674019438592, -12.178754544221178]
20 [39.993843523640614, -11.43504545042641]
21 [46.415071628134825, -10.993523673705779]
22 [37.921887674901754, -11.759576482220

690 [40.003526751427216, 5.0276658063922675]
691 [39.99551815416511, 5.028262218910097]
692 [40.004901766795015, 5.027550768062439]
693 [40.00449240537506, 5.027990334099399]
694 [40.00273022257952, 5.029760760978266]
695 [40.003522812782364, 5.0293039978248]
696 [40.0015608782775, 5.027825502361988]
697 [40.0004153909667, 5.027946965251259]
698 [40.00372445392327, 5.02799824829333]
699 [40.00153712169733, 5.0263804952566025]
700 [40.00405494608144, 5.026636742663057]
701 [40.00355043218073, 5.027021588377942]
702 [39.99493392998925, 5.027644739483271]
703 [40.003014473548326, 5.02859119841932]
704 [40.00092002936686, 5.028723373202546]
705 [40.00131076862712, 5.029068523029449]
706 [39.999707677686644, 5.02837043251111]
707 [39.99994554496463, 5.027599030254432]
708 [40.003874030957235, 5.027642204187872]
709 [40.00318579104573, 5.027762708410933]
710 [40.003012713485056, 5.027865952080773]
711 [40.016401991388804, 5.028952388612345]
712 [40.00393523416885, 5.028829048592211]
713 [40.

# Stochastic Gradient Descent

In [85]:
# x ranges from -50 to 49, y is always 20 * x + 5
inputs = [(x, 40 * x + 5) for x in range(-50, 50)]

import numpy as np

noise = np.random.normal(0,.2,100)
# 0 is the mean of the normal distribution you are choosing from
# 1 is the standard deviation of the normal distribution
# 100 is the number of elements you get in array noise

noisy_inputs = [(x, 40 * x + 5 + np.random.choice(noise)) for x in range(-50, 50)]

In [86]:
theta = [random.uniform(-1, 1), random.uniform(-1, 1)]

for epoch in range(100):
    for x, y in inputs:
        grad = linear_gradient(x, y, theta)
        theta = gradient_step(theta, grad, -learning_rate)
    print(epoch, theta)

slope, intercept = theta
assert 39.9 < slope < 40.1,   "slope should be about 40"
assert 4.9 < intercept < 5.1, "intercept should be about 5"

0 [40.103826763123834, -0.16764003199243344]
1 [40.09937087838581, 0.054071197429941265]
2 [40.095107475436315, 0.2662701543977006]
3 [40.09102699986883, 0.4693649771056407]
4 [40.087121610650826, 0.6637462678381817]
5 [40.083383767001024, 0.8497878700166103]
6 [40.07980629944648, 1.0278475876044495]
7 [40.07638229797326, 1.1982678732212448]
8 [40.07310519131532, 1.36137648673022]
9 [40.06996871008315, 1.517487126755987]
10 [40.066966790274826, 1.6669000330567085]
11 [40.06409367729092, 1.8099025637722013]
12 [40.06134382145756, 1.9467697481632742]
13 [40.05871194902785, 2.077764815564867]
14 [40.05619299249702, 2.2031397019202243]
15 [40.05378208082723, 2.323135533554508]
16 [40.051474646148016, 2.4379830924545094]
17 [40.04926615400217, 2.547903258513692]
18 [40.047152481055484, 2.6531074357651585]
19 [40.04512944708717, 2.753797958201613]
20 [40.04319322052548, 2.8501684779692797]
21 [40.041340046387425, 2.942404339291044]
22 [40.03956641857359, 3.0306829350097253]
23 [40.0378688727