In [1]:
import math
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def test():
    def f(x):
        return 3*x**2 + 10
    xs = np.arange(-5, 5, 0.25)
    ys = f(xs)

    plt.plot(xs, ys)

# test()

In [3]:
# Micrograd Value class
# Describes values in the context of and/or as a result of expression
class Value:
    def __init__(self, data, operands=(), operator='', label=''):
        self.data = data
        self.operands = set(operands)
        self.operator = operator
        self.label = label or str(data)
        self.grad = 0.0
        self._backward = lambda: None
        
    def copy(self):
        operands = tuple(o.copy() for o in self.operands)
        value = Value(self.data, operands, self.operator, self.label)
        return value
        
    def topo(self):
        topo = self.order_top()

        out = []
        for v in topo:
            out.append(f'Value({v.label}={v.data}, grad({v.label})={v.grad})')

        return '\n'.join(out)

    def __repr__(self):
        return f'Value({self.label}={self.data}, grad({self.label})={self.grad})'

    def __add__(self, value):
        if not isinstance(value, Value):
            value = Value(value)

        out = Value(self.data + value.data, (self, value), '+')

        if self.label and value.label:
            out.label = f'({self.label}) + ({value.label})'
        
        def backward():
            self.grad += out.grad * 1.0 # ∂out/∂sum * ∂sum/∂self
            value.grad += out.grad * 1.0 # ∂out/∂sum * ∂sum/∂value

        out._backward = backward

        return out
    
    def __neg__(self):
        out = self * (-1)
        out.label = f'-({self.label})'
        return out
    
    def __sub__(self, value):
        if not isinstance(value, Value): value = Value(value)
        out = self + (-value)
        out.label = f'({self.label}) - ({value.label})'
        return out
    
    def __rsub__(self, value):
        if not isinstance(value, Value): value = Value(value)
        out = -(self - value)
        out.lable = f'({value.label}) - ({self.label})'
        return out
    
    def __radd__(self, value):
        if not isinstance(value, Value): value = Value(value)
        out = self + value
        out.label = f'({value.label}) + ({self.label})'
        return out

    def __mul__(self, value):
        if not isinstance(value, Value): value = Value(value)

        out = Value(self.data * value.data, (self, value), '*')
        
        if self.label and value.label:
            out.label = f'({self.label})*({value.label})'
        
        def backward():
            self.grad += out.grad * value.data # ∂out/∂mul * ∂mul/∂self
            value.grad += out.grad * self.data # ∂out/∂mul * ∂mul/∂value
            
        out._backward = backward

        return out
    
    def __rmul__(self, value):
        if not isinstance(value, Value):
            value = Value(value)

        out = self * value
        out.label = f'({value.label})*({self.label})'
        return out
    
    def __pow__(self, value):
        if not isinstance(value, Value):
            value = Value(value)

        out = Value(self.data**value.data, (self, value),
                    label=f'({self.label or self.data})**{value.label or value.data}')
        
        def backward():
            self.grad += out.grad * value.data * self.data**(value.data - 1)
            
        out._backward = backward
        
        return out
    
    def __truediv__(self, value):
        if not isinstance(value, Value):
            value = Value(value)

        out = self * value**(-1)
        out.label = f'({self.label})/({value.label})'
        return out
    
    def exp(self):
        out = Value(math.exp(self.data), (self,), label=f'exp({self.label})' if self.label else 'exp')
        def backward():
            self.grad += out.data * out.grad
            
        out._backward = backward

        return out

    def tanh(self):
        e = math.exp(2*self.data)
        t = (e - 1)/(e + 1)
        out = Value(t, (self,), label=f'tanh({self.label})' if self.label else 'tanh')
        def backward():
            # print(f'bp tanh self {self} out {out} t {t} grad {out.grad * (1 - t**2)}')
            self.grad += out.grad * (1 - t**2) # ∂out/∂tanh * ∂tanh/∂self
                                        
        out._backward = backward

        return out

    def order_top(self, visited=None):
        # Topological ordering
        if visited is None:
            visited = set()
        # print(f'{{ self data {self.data}, operands {len(self.operands)}, visited {self in visited}, operator {self.operator}')
        topo = []
        if self not in visited:
            visited.add(self)
            for c in self.operands:
                topo += c.order_top(visited)
                # print(f'operand {c.data} visited {len(visited)} topo {len(topo)}')
                
            topo.append(self)

        # print(f'}} self data {self.data}, operands {len(self.operands)}, operator {self.operator}, visited {self in visited}, topo len {len(topo)}')
        return topo

    def backward(self):
        self.grad = 1.0
        for v in reversed(self.order_top()):
            v._backward()

In [4]:
def weighted_biased_sum():
    x1 = Value(2.0, label='x1')
    x2 = Value(0.0, label='x2')

    w1 = Value(-3.0, label='w1')
    w2 = Value(1.0, label='w2')
    # bias
    b = Value(6.8813735870195432, label='b')

    x1w1 = x1 * w1
    x2w2 = x2 * w2
    w_sum = x1w1 + x2w2
    return w_sum + b

out_a = weighted_biased_sum()
out_b = weighted_biased_sum()

o = out_a.tanh() # activation function

# directly calculate tanh()
e = (2*out_b).exp()
t = (e - 1)/(e + 1)

print(o)
print('-' * 30)
print(t)

Value(tanh((((x1)*(w1)) + ((x2)*(w2))) + (b))=0.7071067811865476, grad(tanh((((x1)*(w1)) + ((x2)*(w2))) + (b)))=0.0)
------------------------------
Value(((exp((2)*((((x1)*(w1)) + ((x2)*(w2))) + (b)))) - (1))/((exp((2)*((((x1)*(w1)) + ((x2)*(w2))) + (b)))) + (1))=0.7071067811865477, grad(((exp((2)*((((x1)*(w1)) + ((x2)*(w2))) + (b)))) - (1))/((exp((2)*((((x1)*(w1)) + ((x2)*(w2))) + (b)))) + (1)))=0.0)


In [5]:
o.backward()
t.backward()

In [6]:
print(o.topo())
print('-' * 50)
print(t.topo())

Value(b=6.881373587019543, grad(b)=0.4999999999999999)
Value(w1=-3.0, grad(w1)=0.9999999999999998)
Value(x1=2.0, grad(x1)=-1.4999999999999996)
Value((x1)*(w1)=-6.0, grad((x1)*(w1))=0.4999999999999999)
Value(x2=0.0, grad(x2)=0.4999999999999999)
Value(w2=1.0, grad(w2)=0.0)
Value((x2)*(w2)=0.0, grad((x2)*(w2))=0.4999999999999999)
Value(((x1)*(w1)) + ((x2)*(w2))=-6.0, grad(((x1)*(w1)) + ((x2)*(w2)))=0.4999999999999999)
Value((((x1)*(w1)) + ((x2)*(w2))) + (b)=0.8813735870195432, grad((((x1)*(w1)) + ((x2)*(w2))) + (b))=0.4999999999999999)
Value(tanh((((x1)*(w1)) + ((x2)*(w2))) + (b))=0.7071067811865476, grad(tanh((((x1)*(w1)) + ((x2)*(w2))) + (b)))=1.0)
--------------------------------------------------
Value(-1=-1, grad(-1)=0.0)
Value(1=1, grad(1)=-0.10355339059327374)
Value(2=2, grad(2)=0.2203433967548858)
Value(b=6.881373587019543, grad(b)=0.5)
Value(w1=-3.0, grad(w1)=1.0)
Value(x1=2.0, grad(x1)=-1.5)
Value((x1)*(w1)=-6.0, grad((x1)*(w1))=0.5)
Value(x2=0.0, grad(x2)=0.5)
Value(w2=1.0, gra

In [7]:
import torch

In [8]:
def weighted_biased_sum_2():
    x1 = torch.Tensor([2.0]).double()
    x2 = torch.Tensor([0.0]).double()

    w1 = torch.Tensor([-3.0]).double()
    w2 = torch.Tensor([1.0]).double()
    
    b = torch.Tensor([6.8813735870195432]).double()
    
    for t in (x1, x2, w1, w2, b):
        t.requires_grad = True

    o = torch.tanh(x1*w1 + x2*w2 + b)

    print(o.data.item())

    o.backward()
    
    print('-' * 50)
    print('x1', x1.grad.item())
    print('w1', w1.grad.item())
    print('x2', x2.grad.item())
    print('w2', w2.grad.item())
    
weighted_biased_sum_2()

0.7071066904050358
--------------------------------------------------
x1 -1.5000003851533106
w1 1.0000002567688737
x2 0.5000001283844369
w2 0.0


In [9]:
import random

In [10]:
class Neuron():
    def __init__(self, num):
        self.w = [Value(random.uniform(-1, 1)) for i in range(num)]
        self.b = Value(random.uniform(-1, 1))
        
    def __call__(self, x):
        # w * x + b
        act = sum([w * x for w, x in zip(self.w, x)], self.b)
        out = act.tanh()
        return out
    
    def parameters(self):
        return self.w + [self.b]

class Layer():
    def __init__(self, num_inputs, num_neurons):
        self.neurons = [Neuron(num_inputs) for i in range(num_neurons)]

    def __call__(self, x):
        out = [n(x) for n in self.neurons]
        return out[0] if len(out) == 1 else out
    
    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]
    
class MLP():
    def __init__(self, num_inputs, nums_neurons):
        sz = [num_inputs] + nums_neurons
        self.layers = [Layer(sz[i], sz[i + 1]) for i in range(len(nums_neurons))]

    def __call__(self, x):
        out = x
        for l in self.layers:
            out = l(out)
        return out
    
    def parameters(self):
        return [p for l in self.layers for p in l.parameters()]

In [31]:
x = [2.0, 3.0, -1.0]
n = MLP(3, [4, 4, 1])
n(x).data

-0.17405151726260543

In [32]:
len(n.parameters())

41

In [33]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0],
]

ys = [1.0, -1.0, -1.0, 1.0]

In [39]:
def guess(n, xs, ys):
    y_pred = [n(x) for x in xs]
    loss = sum((y_guess - y_target)**2 for y_target, y_guess in zip(ys, y_pred))
    return loss, y_pred

def train(n, xs, ys):
    loss, y_pred = guess(n, xs, ys)

    for p in n.parameters():
        p.grad = 0.0

    loss.backward()

    for p in n.parameters():
        p.data += -0.1 * p.grad
    
    return loss, y_pred

In [40]:
for i in range(20):
    loss, y_pred = train(n, xs, ys)
    print(i, loss.data, [y.data for y in y_pred])

0 0.02683670793215246 [0.9007536804383721, -0.9419542123963623, -0.9153718413356411, 0.9196530194209068]
1 0.024570319176703184 [0.9047239826522843, -0.9445431714105915, -0.9190803919344267, 0.9233882718614185]
2 0.022649603311120255 [0.9082532487311727, -0.9468222606136724, -0.9223566363210182, 0.9266803365180296]
3 0.021001454270474604 [0.9114181499022602, -0.9488483337184761, -0.9252779695424451, 0.929609408976341]
4 0.01957199234933282 [0.9142778080938236, -0.9506646721326528, -0.9279037182387712, 0.9322368112869506]
5 0.018320636050661525 [0.9168786644235386, -0.9523048812906517, -0.9302802234792171, 0.9346103369343435]
6 0.017216240115193467 [0.9192577876036178, -0.9537955021365807, -0.9324442783129332, 0.936767855507979]
7 0.016234505466027106 [0.921445182041601, -0.9551578101943252, -0.9344255147691252, 0.9387398094870126]
8 0.015356197704192453 [0.9234654353032482, -0.9564090849126538, -0.9362480999073641, 0.9405509843033755]
9 0.01456589422004686 [0.9253389178657239, -0.95756