In [2]:
class Value:
    def __init__(self, data, children=(), _op='', name=''):
        self.data = data
        self.grad = 0
        self._backward = lambda: None
        self._prev = set(children)
        self._op = _op
        self.name = name

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, children=(self, other), _op='+')

        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward

        return out

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, children=(self, other), _op='*')

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward

        return out

    def relu(self):
        out = Value(0 if self.data < 0 else self.data, children=(self,), _op='ReLU')

        def _backward():
            self.grad += (out.data > 0) * out.grad
        out._backward = _backward

        return out

    def __pow__(self, other):
        assert isinstance(other, (int, float)), "only supporting int/float powers for now"
        out = Value(self.data**other, children=(self,), _op=f'**{other}')

        def _backward():
            self.grad += (other * self.data**(other-1)) * out.grad
        out._backward = _backward

        return out


    def __neg__(self):
        return self * -1

    def __radd__(self, other):
        return self + other

    def __sub__(self, other):
        return self + (-other)

    def __rsub__(self, other):
        return other + (-self)

    def __rmul__(self, other):
        return self * other

    def __truediv__(self, other):
        return self * other**-1

    def __rtruediv__(self, other):
        return other * self**-1
    

    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        self.grad = 1
        for v in reversed(topo):
            v._backward()   
    
    def __repr__(self):
        return f'var: {self.name}, data: {self.data}, grad: {self.grad}'

In [159]:
#Weights
W_ae = Value(2, name='W_ae') * 0.01
W_af = Value(4, name='W_af') * 0.01
W_ag = Value(5, name='W_ag') * 0.01
W_be = Value(2, name='W_be') * 0.01
W_bf = Value(2, name='W_bf') * 0.01
W_bg = Value(5, name='W_bg') * 0.01
W_ce = Value(1, name='W_ce') * 0.01
W_cf = Value(0, name='W_cf') * 0.01
W_cg = Value(2, name='W_cg') * 0.01
W_de = Value(2, name='W_de') * 0.01
W_df = Value(3, name='W_df') * 0.01
W_dg = Value(9, name='W_dg') * 0.01
W_ee = Value(3, name='W_ee') * 0.01
W_ef = Value(1, name='W_ef') * 0.01
W_eg = Value(2, name='W_eg') * 0.01
W_fe = Value(0.5, name='W_fe') * 0.01
W_ff = Value(2.5, name='W_ff') * 0.01
W_fg = Value(4, name='W_fg') * 0.01
W_ho = Value(1.5, name='W_ho')
W_io = Value(2.2, name='W_io')
W_jo = Value(3.8, name='W_jo')

parameters = [W_ae, W_af, W_ag, W_be, W_bf, W_bg, W_ce, W_cf, W_cg, W_de, W_df, W_dg, W_ee, W_ef, W_eg, W_fe, W_ff, W_fg, W_ho, W_io, W_jo]


In [3]:
import random

In [89]:
from typing import Any


class Module:

    def zero_grad(self):
        for p in self.parameters():
            p.grad = 0

    def parameters(self):
        return []
    

class Neuron(Module):
    def __init__(self, nin, nonlin=True):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(0)
        self.nonlin = nonlin

    def __call__(self, x):
        act = sum((wi*xi for wi,xi in zip(self.w, x)), self.b)
        return act.relu() if self.nonlin else act

    def parameters(self):
        return self.w + [self.b]
    
    def __repr__(self):
        val = ''
        count = 1
        for w in self.w:
            val += f'weight {count}: {w.data:.2f}, '
            count += 1
        return val

class Layer(Module):
    def __init__(self, nin, nout, **kwargs):
        self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]

    def __call__(self, x):
        out = [n(x) for n in self.neurons]
        return out[0] if len(out) == 1 else out
    
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]
    
    def __repr__(self):
        val = ''
        count = 1
        for neuron in self.neurons:
            val += f'(neuron {count}: {neuron}), \n'
            count += 1
        return val
        
class MLP(Module):

    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1], nonlin=(i!=len(nouts)-1)) for i in range(len(sz)-1)]
    
    def __call__(self, x):
        layerOutputs = []
        for layer in self.layers:
            x = layer(x)
            layerOutputs.append(x)
        return layerOutputs
    
    def parameters(self):
        return [p for layer in self.layers for neuron in layer.neurons for p in neuron.parameters()]
    
    def __repr__(self):
        val = ''
        count = 1
        for layer in self.layers:
            val += f'[\n    layer {count}: {layer}], \n\n'
            count += 1
        return val
    
    def equation(self):
        val = ''
        layerEquations = []

        for layer in self.layers:

            neuronEquations = []
            for neuron in layer.neurons:

                equation = ''
                if len(layerEquations) == 0:

                    for w in neuron.w:
                        equation += f'{w.data:.2f}X + '

                else:
                    for w in neuron.w:
                        equation += f'{w.data:.2f}({layerEquations[-1]}) + '

                if neuron.nonlin:
                    neuronEquations.append(f'max({equation}, 0) + {neuron.b.data} + ')
                else:
                    neuronEquations.append(f'{equation}{neuron.b.data} + ')

            fullLayerEquation = ''

            for neuronEquation in neuronEquations:
                fullLayerEquation += neuronEquation

            layerEquations.append(fullLayerEquation)

        for layerEquation in layerEquations:
            val += layerEquation
        
        return val.replace(' + )', '').replace(' + ,', '')[:-3]
            


In [51]:
import numpy as np
import torch

In [331]:
model = MLP(1, [10, 1])

model.equation()

'max(-0.45X 0) + 0 + max(-0.27X 0) + 0 + max(-0.30X 0) + 0 + max(-0.99X 0) + 0 + max(-0.05X 0) + 0 + max(-0.26X 0) + 0 + max(-0.14X 0) + 0 + max(-0.88X 0) + 0 + max(-0.56X 0) + 0 + max(-0.12X 0) + 0 + 0.97(max(-0.45X 0) + 0 + max(-0.27X 0) + 0 + max(-0.30X 0) + 0 + max(-0.99X 0) + 0 + max(-0.05X 0) + 0 + max(-0.26X 0) + 0 + max(-0.14X 0) + 0 + max(-0.88X 0) + 0 + max(-0.56X 0) + 0 + max(-0.12X 0) + 0 + -0.64(max(-0.45X 0) + 0 + max(-0.27X 0) + 0 + max(-0.30X 0) + 0 + max(-0.99X 0) + 0 + max(-0.05X 0) + 0 + max(-0.26X 0) + 0 + max(-0.14X 0) + 0 + max(-0.88X 0) + 0 + max(-0.56X 0) + 0 + max(-0.12X 0) + 0 + -0.52(max(-0.45X 0) + 0 + max(-0.27X 0) + 0 + max(-0.30X 0) + 0 + max(-0.99X 0) + 0 + max(-0.05X 0) + 0 + max(-0.26X 0) + 0 + max(-0.14X 0) + 0 + max(-0.88X 0) + 0 + max(-0.56X 0) + 0 + max(-0.12X 0) + 0 + -0.18(max(-0.45X 0) + 0 + max(-0.27X 0) + 0 + max(-0.30X 0) + 0 + max(-0.99X 0) + 0 + max(-0.05X 0) + 0 + max(-0.26X 0) + 0 + max(-0.14X 0) + 0 + max(-0.88X 0) + 0 + max(-0.56X 0) + 

In [314]:
model.equation()

'max(-0.02X 0) + 0.0 + max(0.98X 0) + -0.1472802040086161 + max(1.03X 0) + 0.026103939140213885 + max(0.81X 0) + -0.11235439242305148 + max(-0.59X 0) + 0.0 + max(-0.97X 0) + 0.0 + max(0.10X 0) + -0.09763589415747015 + max(-0.81X 0) + 0.0 + max(1.53X 0) + -0.11555649941993829 + max(-0.59X 0) + 0.0 + max(0.52(max(-0.02X 0) + 0.0 + max(0.98X 0) + -0.1472802040086161 + max(1.03X 0) + 0.026103939140213885 + max(0.81X 0) + -0.11235439242305148 + max(-0.59X 0) + 0.0 + max(-0.97X 0) + 0.0 + max(0.10X 0) + -0.09763589415747015 + max(-0.81X 0) + 0.0 + max(1.53X 0) + -0.11555649941993829 + max(-0.59X 0) + 0.0 + 0.18(max(-0.02X 0) + 0.0 + max(0.98X 0) + -0.1472802040086161 + max(1.03X 0) + 0.026103939140213885 + max(0.81X 0) + -0.11235439242305148 + max(-0.59X 0) + 0.0 + max(-0.97X 0) + 0.0 + max(0.10X 0) + -0.09763589415747015 + max(-0.81X 0) + 0.0 + max(1.53X 0) + -0.11555649941993829 + max(-0.59X 0) + 0.0 + -0.64(max(-0.02X 0) + 0.0 + max(0.98X 0) + -0.1472802040086161 + max(1.03X 0) + 0.026103

In [88]:
model.parameters()

5555

In [5]:
import random

num_examples = 500

mean = 0  
std_dev = 3.33
inputs = np.random.uniform(0, 1, num_examples).reshape(-1, 1)
inputs = inputs.flatten()
outputs = 10 * inputs**2 + 2*inputs

inputs

NameError: name 'np' is not defined

In [313]:
for _ in range(10):  # Number of epochs
    for i in range(len(inputs)):
        # Forward pass
        predicted = model([inputs[i]])

        # Calculate loss
        expected_output = outputs[i]
        loss = (predicted - expected_output) ** 2

        print(loss.data)

        # Zero gradients and backpropagate
        model.zero_grad()
        loss.backward()

        # Update weights
        for p in model.parameters():
            p.data -= 0.0001 * p.grad


40.722226986977724
93.53987434040761
48.978728193819116
11.139198849401005
0.3704789286034418
0.7000835889906964
6.166579221094188
5.469123552567205
4.274628365354448
0.7989385850014609
21.581184817810012
58.307378648427935
0.03488364197761468
15.454619860139635
4.636537487676613
20.665558786383233
33.283482496129366
79.4111588624771
108.96399899501588
1.9286362797104357
4.038573362931092
2.84841445877767
0.05767023673555246
33.52970180060217
10.761062634623023
6.157704946367185
11.275417140525162
9.712295698408859
0.09994402158418457
44.714840042890295
32.3584289256118
1.867688808688389
0.050895834657561215
6.152420540342864
2.64341794203732
145.9734413323149
0.9373289916200989
20.802671609672508
1.3247397210165632
28.993070030125736
58.546229381091166
0.3465593799009408
0.03452410722626544
22.28607753369704
0.9615715280076211
97.82713045554144
29.884983577473406
70.88234346854102
8.928019664570645
90.47479696939502
25.852104104023358
14.19948078536319
4.846879292502021
13.02432785073

In [250]:
for n in np.random.uniform(0, 1, 10).reshape(-1, 1):
    output = model([n])
    print(f'output: {output.data}, expected: {10 * n**2 + 2*n}')

NameError: name 'np' is not defined