In [17]:
class Value:
    def __init__(self, data, children=(), _op='', name=''):
        self.data = data
        self.grad = 0
        self._backward = lambda: None
        self._prev = set(children)
        self._op = _op
        self.name = name

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, children=(self, other), _op='+')

        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward

        return out

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, children=(self, other), _op='*')

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward

        return out

    def relu(self):
        out = Value(0 if self.data < 0 else self.data, children=(self,), _op='ReLU')

        def _backward():
            self.grad += (out.data > 0) * out.grad
        out._backward = _backward

        return out

    def __pow__(self, other):
        assert isinstance(other, (int, float)), "only supporting int/float powers for now"
        out = Value(self.data**other, children=(self,), _op=f'**{other}')

        def _backward():
            self.grad += (other * self.data**(other-1)) * out.grad
        out._backward = _backward

        return out


    def __neg__(self):
        return self * -1

    def __radd__(self, other):
        return self + other

    def __sub__(self, other):
        return self + (-other)

    def __rsub__(self, other):
        return other + (-self)

    def __rmul__(self, other):
        return self * other

    def __truediv__(self, other):
        return self * other**-1

    def __rtruediv__(self, other):
        return other * self**-1
    

    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        self.grad = 1
        for v in reversed(topo):
            v._backward()   
    
    def __repr__(self):
        return f'var: {self.name}, data: {self.data}, grad: {self.grad}'

In [18]:
#Weights
W_ae = Value(2, name='W_ae') * 0.01
W_af = Value(4, name='W_af') * 0.01
W_ag = Value(5, name='W_ag') * 0.01
W_be = Value(2, name='W_be') * 0.01
W_bf = Value(2, name='W_bf') * 0.01
W_bg = Value(5, name='W_bg') * 0.01
W_ce = Value(1, name='W_ce') * 0.01
W_cf = Value(0, name='W_cf') * 0.01
W_cg = Value(2, name='W_cg') * 0.01
W_de = Value(2, name='W_de') * 0.01
W_df = Value(3, name='W_df') * 0.01
W_dg = Value(9, name='W_dg') * 0.01
W_ee = Value(3, name='W_ee') * 0.01
W_ef = Value(1, name='W_ef') * 0.01
W_eg = Value(2, name='W_eg') * 0.01
W_fe = Value(0.5, name='W_fe') * 0.01
W_ff = Value(2.5, name='W_ff') * 0.01
W_fg = Value(4, name='W_fg') * 0.01
W_ho = Value(1.5, name='W_ho')
W_io = Value(2.2, name='W_io')
W_jo = Value(3.8, name='W_jo')

parameters = [W_ae, W_af, W_ag, W_be, W_bf, W_bg, W_ce, W_cf, W_cg, W_de, W_df, W_dg, W_ee, W_ef, W_eg, W_fe, W_ff, W_fg, W_ho, W_io, W_jo]


In [19]:
import random

In [20]:
from typing import Any


class Module:

    def zero_grad(self):
        for p in self.parameters():
            p.grad = 0

    def parameters(self):
        return []
    

class Neuron(Module):
    def __init__(self, nin, nonlin=True):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(0)
        self.nonlin = nonlin

    def __call__(self, x):
        act = sum((wi*xi for wi,xi in zip(self.w, x)), self.b)
        return act.relu() if self.nonlin else act

    def parameters(self):
        return self.w + [self.b]
    
    def __repr__(self):
        return f"{'ReLU' if self.nonlin else 'Linear'}Neuron({len(self.w)})"

class Layer(Module):
    def __init__(self, nin, nout, **kwargs):
        self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]

    def __call__(self, x):
        out = [n(x) for n in self.neurons]
        return out[0] if len(out) == 1 else out
    
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]
    
    def __repr__(self):
        return f"Layer of [{', '.join(str(n) for n in self.neurons)}]"
        
class MLP(Module):

    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1], nonlin=(i!=len(nouts)-1)) for i in range(len(sz)-1)]
    
    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for layer in self.layers for neuron in layer.neurons for p in neuron.parameters()]
    
    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"

In [21]:
import numpy as np

In [22]:
learning_rate = 0.01
inputs = np.repeat(np.arange(0, 1001), 6).reshape(-1, 6)/1001
outputs = np.arange(0, 2002, 2)/1001

random_numbers = np.random.randint(0, len(inputs), 10)
for num, input in enumerate(inputs):
    a = Value(input[0], name='a')
    b = Value(input[1], name='b')
    c = Value(input[2], name='c')
    d = Value(input[3], name='d')
    e = Value(input[4], name='e')
    f = Value(input[5], name='f')
            
        
    h = W_ae * a + W_be * b + W_ce * c + W_de * d + W_ee * e + W_fe * f
    i = W_af * a + W_bf * b + W_cf * c + W_df * d + W_ef * e + W_ff * f
    j = W_ag * a + W_bg * b + W_cg * c + W_dg * d + W_eg * e + W_fg * f
    o = W_ho * h + W_io * i + W_jo * j

    loss = (o-(outputs[num]))**2

    for p in parameters:
        p.grad = 0

    loss.backward()

    for p in parameters:
        p.data -= learning_rate * p.grad

In [23]:
numbers = np.linspace(0, 1, 11)
for n in numbers:

    a = Value(n, name='a')
    b = Value(n, name='b')
    c = Value(n, name='c')
    d = Value(n, name='d')
    e = Value(n, name='e')
    f = Value(n, name='f')
            
        
    h = W_ae * a + W_be * b + W_ce * c + W_de * d + W_ee * e + W_fe * f
    i = W_af * a + W_bf * b + W_cf * c + W_df * d + W_ef * e + W_ff * f
    j = W_ag * a + W_bg * b + W_cg * c + W_dg * d + W_eg * e + W_fg * f
    o = W_ho * h + W_io * i + W_jo * j

    print(f'output: {o.data}, expected: {n*2}')

output: 0.0, expected: 0.0
output: 0.20134870089887213, expected: 0.2
output: 0.40269740179774427, expected: 0.4
output: 0.6040461026966164, expected: 0.6000000000000001
output: 0.8053948035954885, expected: 0.8
output: 1.0067435044943607, expected: 1.0
output: 1.2080922053932328, expected: 1.2000000000000002
output: 1.4094409062921052, expected: 1.4000000000000001
output: 1.610789607190977, expected: 1.6
output: 1.8121383080898492, expected: 1.8
output: 2.0134870089887213, expected: 2.0


In [47]:
import random

# Initialize your MLP model
nin = 1  # Number of input neurons
nouts = [10, 1]  # Example: 2 hidden layers with 10 neurons each, and 1 output neuron
model = MLP(nin, nouts)

# Training loop
learning_rate = 0.01
num_examples = 500

# Generate random normally distributed inputs
mean = 0  # Mean of the distribution
std_dev = 33.3  # Standard deviation to get values roughly in the range -100 to 100
inputs = np.random.normal(mean, std_dev, num_examples).reshape(-1, 1)
inputs = np.clip(inputs, -100, 100).flatten()
outputs = 10 * inputs + 2

In [25]:
inputs[0]

-58.82198080922436

In [81]:
for _ in range(100):  # Number of epochs
    for i in range(len(inputs)):
        # Forward pass
        predicted = model([inputs[i]])

        # print(f'inputs: {inputs[i]}, output: {outputs[i]}')

        # Calculate loss
        expected_output = outputs[i]
        loss = (predicted - expected_output) ** 2

        print(loss.data)

        # Zero gradients and backpropagate
        model.zero_grad()
        loss.backward()

        # Update weights
        for p in model.parameters():
            p.data -= 0.000001 * p.grad
    


1.528617185661143
2.2513869014198806
0.3271847731388538
0.5693520944937855
2.120767901487743
0.399122211260104
2.174791898345886
0.6883384548180943
0.029406438281722208
1.0345910122299045
0.2096833966406004
1.3288431361334105
3.585543019634455
0.4666982238601638
1.602067409472242
0.019403422615563732
2.3198310926110097
1.217012338515978
1.219080513720311
0.7992834115453704
0.4380278321531486
2.3225282248969403
2.253080605057412
0.08207364641129847
2.0549959785906786
1.2038344862758388
1.667608035749572
0.8645406666329344
2.194504722673806
2.5518181136286384
0.11482198038120164
0.8593407467675286
4.034061822296086
2.4965586629091634
1.2065664079455256
0.9406141805138706
2.050468187880819
0.41956955302441146
0.4550855089878297
0.5259320388876947
0.046072761476025195
0.19790027403003746
0.40870697334808803
2.4881570178426866
1.5755635678696835
0.21441626091489813
0.2709648582610684
4.087212766229887
0.16680931202858937
2.0991085342407514
1.2183986311071597
0.11580262656982454
0.3099796071

In [98]:
for n in np.random.randint(-100, 100, 100):
    output = model([n])
    print(f'output: {output.data}, expected: {10*n+2}')

output: 722.5692548881073, expected: 722
output: -358.0991379882576, expected: -358
output: 582.2015028373519, expected: 582
output: -407.8897922994711, expected: -408
output: 451.8600187902217, expected: 452
output: 652.3853788627296, expected: 652
output: -407.8897922994711, expected: -408
output: 201.20331869958687, expected: 202
output: -467.63857747292724, expected: -468
output: 973.225954978742, expected: 972
output: -228.64343677910267, expected: -228
output: 291.4397307322154, expected: 292
output: 903.0420789533644, expected: 902
output: -19.522688672006105, expected: -18
output: -29.480819534248795, expected: -28
output: -567.2198860953541, expected: -568
output: -537.3454935086261, expected: -538
output: -288.39222195255877, expected: -288
output: 732.5955228917327, expected: 732
output: 231.28212271046309, expected: 232
output: -487.55483919741266, expected: -488
output: -218.68530591685993, expected: -218
output: -19.522688672006105, expected: -18
output: -846.047550238149