In [1]:
from micrograd.engine import Value
from micrograd.neural_net import Neuron, Layer, MLP
from micrograd.viz import draw_graph

In [2]:
# Dataset of 4 items
xs = [
    [2,3,-1],
    [3,-1,0.5],
    [0.5,1,1],
    [1,1,-1]
]
ys = [1, -1, -1, 1]

step_size = 0.01

In [3]:
# Conceptually, this is a function that takes in an x_i and returns a scalar (well, 1d vector)
# Representation wise, right now it just has a bunch of disconnected Value objects. They're not linked yet
model = MLP(3, [4,4,1])

In [4]:
# Gradient descent on f(w) -> MLP output.
# This is not what we should be doing -- we want to decrease the loss function, not the output
# on 1 data point, but it's just to show that we can gradient descent on anything.
# Invoking MLP on an input creates the network, and performs a forward pass.
val = model(xs[0])[0]
print(f"first output = {val.data}")
val.run_full_backpropagation()
for param in model.parameters:
    param.data -= param.grad * step_size

val = model(xs[0])[0]
print(f"after one step = {val.data}") 

first output = -0.7105225964370366
after one step = -0.727294720296473


In [5]:
# Gradient desccent to reduce f(w) = loss on the first data point
loss = (model(xs[0])[0] - ys[0]) ** 2
print(f"first loss = {loss.data}")
loss.run_full_backpropagation()

for param in model.parameters:
    param.data -= param.grad * step_size

loss = (model(xs[0])[0] - ys[0]) ** 2
print(f"after one step = {loss.data}") 

first loss = 2.983547050764071
after one step = 2.7853898529896064


In [6]:
num_steps = 50
step_size = 0.1
for step in range(num_steps):
    # Conceptually, loss is a function that takes in weights, and returns a loss scalar.
    # This creates the network and perform a forward pass on the entire dataset
    loss = sum([(model(xs[i])[0] - ys[i])** 2 for i in range(len(ys))])

    # Run backprop to compute dloss(w)/dw for all w params.
    loss.run_full_backpropagation()
    
    # Take the opposite gradient step
    for param in model.parameters:
        param.data -= step_size * param.grad
    
    print(f"Step {step} loss = {loss.data}")



Step 0 loss = 8.752951887561764
Step 1 loss = 3.704948474777731
Step 2 loss = 2.9660303981129092
Step 3 loss = 4.069500881426543
Step 4 loss = 0.5144979684255687
Step 5 loss = 0.2319877389750086
Step 6 loss = 0.13691909258194718
Step 7 loss = 0.09893503695276235
Step 8 loss = 0.07740768501491944
Step 9 loss = 0.06347911048000197
Step 10 loss = 0.053746571752151695
Step 11 loss = 0.046570558678917365
Step 12 loss = 0.041065099718451153
Step 13 loss = 0.03671018182350557
Step 14 loss = 0.033180798401665254
Step 15 loss = 0.03026352354651262
Step 16 loss = 0.027812488811025108
Step 17 loss = 0.025724654529611573
Step 18 loss = 0.023925190040531667
Step 19 loss = 0.022358453241665657
Step 20 loss = 0.02098221973067667
Step 21 loss = 0.019763874238788077
Step 22 loss = 0.018677828276136562
Step 23 loss = 0.017703727216988176
Step 24 loss = 0.016825179135218107
Step 25 loss = 0.01602883658172381
Step 26 loss = 0.015303722119340932
Step 27 loss = 0.014640725376161134
Step 28 loss = 0.01403222

In [7]:
print([model(x)[0].data for x in xs])
print(ys)

[0.9725684253675214, -0.9668185724655831, -0.9449641292667839, 0.9508174668459224]
[1, -1, -1, 1]
