In [1]:
import random
import math
import numpy as np

In [2]:
class Value():
    def __init__(self, value, _children=()):
        self.value = value
        self.gradient = 0

        self._backward = lambda: None
        self._previous = set(_children)
    
    def __add__(self, other):
        other = other if isinstance(other,Value) else Value(other)
        out = Value(self.value + other.value, (self,other))

        def _backward():
            self.gradient += out.gradient
            other.gradient += out.gradient
        
        out._backward = _backward
        return out
    
    def __mul__(self,other):
        other = other if isinstance(other,Value) else Value(other)
        out = Value(self.value * other.value, (self,other))

        def _backward():
            self.gradient += other.value * out.gradient
            other.gradient += self.value * out.gradient 

        out._backward = _backward
        return out
    
    def __pow__(self,other):
        out = Value(self.value**other , (self,))

        def _backward():
            self.gradient += (other *self.value**(other-1)) * out.gradient
        out._backward = _backward
        return out

    def relu(self):
        out = Value((self.value>0)* self.value, (self,) )

        def _backward():
            self.gradient += (self.value>0)*out.gradient
        out._backward = _backward
        return out
    
    def __radd__(self, other): 
        # use __add__, i.e. other + self for right addition
        return self + other
    
    def __sub__(self, other): # self - other
        return self + (-other)

    def __rsub__(self, other): # other - self
        return other + (-self)
    
    def __neg__(self): # -self
        return self * -1
    
    def __truediv__(self, other): # self / other
        return self * other**-1

    def __rtruediv__(self, other): # other / self
        return other * self**-1
    
    def __rmul__(self, other): # other * self
        return self * other
    
    def __repr__(self):
        return f"Value(data={self.value}, gradient={self.gradient})"
    
    def backward(self):
        topology = []
        visited = set()
        def build_topology(v):
            if v not in visited:
                visited.add(v)
                for child in v._previous:
                    build_topology(child)
                topology.append(v)
        build_topology(self)
        self.gradient = 1
        for v in reversed(topology):
            v._backward()

In [3]:
class Module:
    def zero_gradient(self):
        for param in self.parameters():
            param.gradient = 0

    def parameters(self):
        return []

In [4]:
class Neuron(Module):
    def __init__(self, n_input, is_nonlinear=True):
        self.weights = [Value(random.uniform(-1,1)) for _ in range(n_input)]
        self.bias = Value(0)
        self.is_nonlinear = is_nonlinear
    def __call__(self, x):
        activation = sum((weight_i*x_i for weight_i, x_i in zip(self.weights,x)), self.bias)
        return activation.relu() if self.is_nonlinear else activation
    
    def parameters(self):
        return self.weights + [self.bias]


In [5]:
class Layer(Module):
    def __init__(self, n_inputs, n_outputs, **kwargs):
        self.neurons = [Neuron(n_inputs, **kwargs) for _ in range(n_outputs)]

    def __call__(self, x):
        out = [neuron(x) for neuron in self.neurons]
        return out[0] if len(out) == 1 else out
    
    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]



In [6]:
def max_margin_loss(target, predicted):
    out = (1 + -target*predicted).relu() 
    return out

In [7]:
def rmse(target, predicted):
    return ((target - predicted)**2)**(1/2)

In [8]:
class Network(Module):
    def __init__(self, n_inputs, layer_sizes, loss_function=rmse, output_transform=lambda x:x):
        size = [n_inputs] + layer_sizes
        self.layers = [Layer(size[i], size[i+1], is_nonlinear= i != len(layer_sizes) -1) for i in range(len(layer_sizes))]
        self.loss_function= loss_function
        self.output_transform =output_transform

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]
        

    def forward(self,x):
        for layer in self.layers:
            x = layer(x)
        return self.output_transform(x)
    
    def loss(self, x, target):
        if isinstance(x[0], list):
            y_hat = [self.forward(xi) for xi in x]
            loss = [self.loss_function(target[i], y_hat[i]) for i in range(len(target))]
            return sum(loss)
        else:
            y_hat = self.forward(x)
            loss = self.loss_function(target, y_hat)
        return loss
    
    def train(self, x, target, learning_rate=0.01):
        self.zero_gradient()
        losses = [ ]
        for sample_i in range(len(x)):
            losses.append(self.loss(x[sample_i], target[sample_i]))
        total_loss = sum(losses)
        total_loss.backward()
        for param in self.parameters():
            param.value -= learning_rate*param.gradient
        return total_loss.value

In [16]:
from sklearn.datasets import make_circles
data, target = make_circles(100)
net = Network(2, [1,5,1], loss_function=rmse)

In [19]:
n_epochs =100
batch_size = 5
for e in range(n_epochs):
    lr = 1.0 - 0.9*e/n_epochs
    for b in range(0, len(data), batch_size):
        loss = net.train(data[b:b+batch_size], target[b:b+batch_size], learning_rate=lr)
    if e%3 == 0:
        predictions = list(map(net.forward,data))
        true_predict = [round(predictions[i].value) == target[i] for i in range(len(data))]
        print(f"Epoch [{e}/{n_epochs}], loss {loss} accuracy {sum(true_predict)/ len(data)}")

Epoch [0/100], loss 15.000000000000009 accuracy 0.0
Epoch [3/100], loss 14.325000000000014 accuracy 0.0
Epoch [6/100], loss 13.650000000000011 accuracy 0.0
Epoch [9/100], loss 12.975000000000016 accuracy 0.0
Epoch [12/100], loss 12.300000000000013 accuracy 0.0
Epoch [15/100], loss 11.625000000000014 accuracy 0.0
Epoch [18/100], loss 10.95000000000001 accuracy 0.0
Epoch [21/100], loss 10.275000000000016 accuracy 0.0
Epoch [24/100], loss 9.600000000000017 accuracy 0.0
Epoch [27/100], loss 8.925000000000018 accuracy 0.0
Epoch [30/100], loss 8.250000000000018 accuracy 0.0
Epoch [33/100], loss 7.575000000000021 accuracy 0.0
Epoch [36/100], loss 6.900000000000016 accuracy 0.0
Epoch [39/100], loss 6.225000000000017 accuracy 0.0
Epoch [42/100], loss 5.550000000000015 accuracy 0.0
Epoch [45/100], loss 4.875000000000016 accuracy 0.0
Epoch [48/100], loss 4.200000000000017 accuracy 0.0
Epoch [51/100], loss 3.5250000000000177 accuracy 0.0
Epoch [54/100], loss 7.990000000000028 accuracy 0.0
Epoch [5

In [20]:
net.forward(data[0])

Value(data=0.3840000000000048, gradient=0)

In [13]:
list(map(sum, [[1,2]]))

[3]