In [1]:
from micrograd.engine import Value
from micrograd.nn import neuron, Layer
import random

In [278]:
#Create a tiny XOR dataset
X1 = [random.choice([0,1]) for _ in range(20)]
X2 = [random.choice([0,1]) for _ in range(20)]
X = [[Value(x1),Value(x2)] for x1,x2 in zip(X1,X2)]
y =[Value((x1!=x2)*1) for x1,x2 in zip(X1,X2)]
X,y

([[Value(data=1, grad=0), Value(data=0, grad=0)],
  [Value(data=0, grad=0), Value(data=1, grad=0)],
  [Value(data=1, grad=0), Value(data=1, grad=0)],
  [Value(data=1, grad=0), Value(data=0, grad=0)],
  [Value(data=1, grad=0), Value(data=1, grad=0)],
  [Value(data=1, grad=0), Value(data=1, grad=0)],
  [Value(data=1, grad=0), Value(data=1, grad=0)],
  [Value(data=0, grad=0), Value(data=0, grad=0)],
  [Value(data=0, grad=0), Value(data=1, grad=0)],
  [Value(data=0, grad=0), Value(data=0, grad=0)],
  [Value(data=0, grad=0), Value(data=0, grad=0)],
  [Value(data=1, grad=0), Value(data=1, grad=0)],
  [Value(data=0, grad=0), Value(data=1, grad=0)],
  [Value(data=1, grad=0), Value(data=0, grad=0)],
  [Value(data=1, grad=0), Value(data=1, grad=0)],
  [Value(data=0, grad=0), Value(data=1, grad=0)],
  [Value(data=1, grad=0), Value(data=0, grad=0)],
  [Value(data=0, grad=0), Value(data=0, grad=0)],
  [Value(data=1, grad=0), Value(data=0, grad=0)],
  [Value(data=0, grad=0), Value(data=1, grad=0)]],

In [266]:
class mlp:
    def __init__(self):
        self.layers = [Layer(2,8), neuron(8, nonlin=False)]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"

    def zero_grad(self):
        for p in self.parameters():
            p.grad = 0

In [283]:
clf = mlp()

In [285]:
#training a nn with 1 hidden layer to learn XOR

for _ in range(200):
    #one epoch of training
    clf.zero_grad()
    loss = Value(0)
    for i in range(len(X)):
        out = clf(X[i])
        loss = loss + (-y[i] + out)*(-y[i] + out)

    loss.backward()

    #update paramters
    for p in clf.parameters():
        p.data += -0.01*p.grad

    if _ % 20 == 0:
        print(f'epoch {_} loss: {loss.data}')

epoch 0 loss: 18.48226252132663
epoch 20 loss: 1.0660252862941315
epoch 40 loss: 0.47350546745708216
epoch 60 loss: 0.22303368540645688
epoch 80 loss: 0.10228246217059639
epoch 100 loss: 0.04645297064959541
epoch 120 loss: 0.02064193565937767
epoch 140 loss: 0.009165113233882999
epoch 160 loss: 0.003985699294053325
epoch 180 loss: 0.0017283713899249248


In [287]:
# ""validatio""
clf([1,0]).data, clf([0,1]).data, clf([1,1]).data, clf([0,0]).data

(0.9956261268869936,
 0.9980805351568673,
 0.0011744151495374985,
 0.012476701303970338)