In [1]:
import math
import numpy as np
import matplotlib.pyplot as plt
import random
%matplotlib inline

In [2]:
class Value:
    def __init__(self, data, _children = (), _op = '', label = '') -> None:
        self.data = data
        self.grad = 0.0
        self._backward = lambda: None
        self._prev = _children 
        self._op = _op
        self.label = label

    def __repr__(self) -> str:
        return f"Value(data={self.data})"
    
    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')

        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad
        out._backward = _backward

        return out
    
    def __radd__(self, other):
        return self + other
    
    def __neg__(self):
        return self * -1
    
    def __sub__(self, other):
        return self + (-other)
    
    def __rsub__(self, other):
        return -self + other

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward

        return out
    
    def __rmul__(self, other):
        return self * other

    def __pow__(self, other):
        assert isinstance(other, (int, float)), 'only support int and floats powers'
        out = Value(self.data ** other, (self, ), f'**{other}')

        def _backward():
            self.grad += other * (self.data ** (other - 1)) * out.grad
        out._backward = _backward

        return out

    def __truediv__(self, other):
        return self * other**-1

    def log(self):
        x = self.data
        t = np.log(x)
        out = Value(t, (self, ), 'log')
        def _backward():
            self.grad += (1/x) * out.grad  
        out._backward = _backward
        return out

    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
        out = Value(t, (self,), 'tanh')
        def _backward():
            self.grad += (1 - t**2) * out.grad 
        out._backward = _backward
        return out

    def relu(self):
        x = self.data
        t = max(0, x)
        out = Value(t, (self, ), 'relu')
        def _backward():
            self.grad = 1.0 * out.grad if x >= 0 else out.grad 
        out._backward = _backward 
        return out
    
    def softmax(self):
        pass

    def sigmoid(self):
        x = self.data
        t = 1 / (1 + math.exp(-x))
        out = Value(t, (self,), 'sigmoid')
        def _backward():
            self.grad += t * (1 - t) * out.grad
        out._backward = _backward
        return out

    def exp(self):
        x = self.data
        out = Value(math.exp(x), (self, ), 'exp')
        def _backward():
            self.grad += out.data * out.grad
        out._backward = _backward
        return out
    
    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        self.grad = 1.0

        for node in reversed(topo):
            node._backward()    

In [50]:
def softmax_score(scores):
    exps = np.exp(scores)
    exp_sums = np.sum(exps, axis=1, keepdims=True)
    return np.array(exps / exp_sums)

In [87]:
### other way around softmax score
    # ret = []
    # for values in scores:
    #     exps = []
    #     sum = 0
    #     for value in values:
    #         exp = value.exp()
    #         exps.append(exp)
    #         sum += exp
    #     exps_2 = []
    #     for exp in exps:
    #         exps_2.append(exp / sum)
    #     ret.append(exps_2)
    # return np.array(ret)

In [4]:
# Optimizer 
class SGD:
    def __init__(self, parameters, lr=0.01) -> None:
        self.parameters = parameters
        self.lr = lr
    
    def zero_grad(self):
        for p in self.parameters:
            p.grad = 0

    def step(self):
        for p in self.parameters:
            p.data += (-1*self.lr) * p.grad

In [5]:
# Mean Squared Error Loss function
class MSE:
    def __init__(self) -> None:
        pass

    def __call__(self, input, target):
        loss = sum((y_in - y_target)**2 for y_in, y_target in zip(input, target))
        return loss

In [62]:
# CrossEntropy loss 
class CrossEntropyLoss():
    def __init__(self):
        pass

    def __call__(self, input, target):
        tol = 1e-6
        sum = 0
        for y_pred, y_true in zip(input, target):
            for i in range(len(y_pred)):
                sum += (y_pred[i] + tol).log() * y_true[i]
        loss = sum / len(input)
        # loss = -np.mean(np.sum(np.log(input + tol) * target,axis=1))
        return -loss

In [7]:
# For debugging purposes
class BinaryCrossEntropyLoss():
    def __init__(self):
        pass
    
    # TODO: Simplify it
    def __call__(self, input, target):
        result = 0
        for y_pred, y_true in zip(input, target):
            if y_true == -1:
                a = (1-y_pred)
                result += a.log()
            elif y_true == 1:
                result += y_pred.log()
        result = -(result / len(input))
        return result

In [8]:
class Neuron:
    def __init__(self, nin):
        self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1, 1))

    def __call__(self, x, activation=None):
        act = sum((wi * xi for wi, xi in zip(self.w, x)), self.b) 
        match activation:
            case 'relu':
                out = act.relu()
            case 'tanh':
                out = act.tanh()
            case 'softmax':
                out = act.softmax()
            case 'sigmoid':
                out = act.sigmoid()
            case _:
                out = act
        return out

    def parameters(self):
        return self.w + [self.b]

class Layer:
    def __init__(self, in_shape, out_shape, activation=None):
        self.in_shape = in_shape
        self.out_shape = out_shape
        self.activation = activation
        self.neurons = [Neuron(in_shape) for _ in range(out_shape)]

    def __call__(self, x):
        outs = [n(x, self.activation) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs
    
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]
    
class MLP:
    def __init__(self, in_shape, out_shape):
        self.in_shape = in_shape; self.out_shape = out_shape
        self.layers = []
    
    def add_layer(self, x:Layer):
        self.layers.append(x)

    def __call__(self, xs):
        out = []
        for x in xs:
            for layer in self.layers:
                x = layer(x)
            out.append(x)
        return out
    
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

In [9]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0],
    [1.5, 0.5, -0.5],
    [1.0, 2.0, -2.0]
]
ys = [[0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0]]


In [90]:
layer0 = Layer(in_shape=3, out_shape=4, activation='relu')
layer1 = Layer(in_shape=4, out_shape=10, activation='relu')
layer1 = Layer(in_shape=10, out_shape=10, activation='relu')
layer2 = Layer(in_shape=10, out_shape=3) 

NUM_FEATURES = len(xs[0])
NUM_CLASSES = 2
nn = MLP(in_shape=NUM_FEATURES, out_shape=NUM_CLASSES)

nn.add_layer(layer0)
nn.add_layer(layer1)
nn.add_layer(layer2)

In [91]:
outputs = nn(xs)
actv = softmax_score(outputs)
actv

array([[Value(data=0.0066398531618817635),
        Value(data=0.04047810514997612), Value(data=0.9528820416881422)],
       [Value(data=0.14969702013111838), Value(data=0.4911127517196177),
        Value(data=0.35919022814926377)],
       [Value(data=0.05888102172969198), Value(data=0.2735175780461315),
        Value(data=0.6676014002241766)],
       [Value(data=0.0247990607027199), Value(data=0.11538533569249014),
        Value(data=0.8598156036047898)],
       [Value(data=0.0751470964790192), Value(data=0.26806682985747926),
        Value(data=0.6567860736635015)],
       [Value(data=0.004741828316488493),
        Value(data=0.026062054265955755), Value(data=0.9691961174175557)]],
      dtype=object)

In [95]:
epochs = 1000
lr = 0.1
loss_func = CrossEntropyLoss()
optimizer = SGD(parameters=nn.parameters(), lr=lr)
for epoch in range(epochs):
    # forward pass
    y_logits = nn(xs)
    y_pred = softmax_score(y_logits)
    loss = loss_func(y_pred, ys)

    # zero grad 
    optimizer.zero_grad()
    
    # backward pass 
    loss.backward() 

    # gradient descent
    optimizer.step()

    print(epoch, loss.data)

0 0.23462934348599643
1 0.35668199531237466
2 0.23070293110792833
3 0.3518476463837704
4 0.2264550271181295
5 0.3464111034062152
6 0.22188279700255223
7 0.3400948873166084
8 0.21745007253899717
9 0.3340374513262832
10 0.21330266015704596
11 0.32782338846697906
12 0.20935510979269636
13 0.32141547568392004
14 0.20558100628051937
15 0.3148442707636799
16 0.20198395113761095
17 0.30814323327704085
18 0.19858900527642648
19 0.3013723689265312
20 0.19542235414732012
21 0.2945349634453737
22 0.19251191749744656
23 0.2876372615290615
24 0.18956821473192667
25 0.2799419628077481
26 0.18696967469736253
27 0.27166146574338584
28 0.18492827132045433
29 0.2630664019591142
30 0.18322026892023582
31 0.2568164395566931
32 0.18135533925939587
33 0.2518014597813999
34 0.17945236772022635
35 0.2471031901432591
36 0.17771811049081598
37 0.24244594921197088
38 0.17619991209782504
39 0.23782499248323252
40 0.17496187438623478
41 0.23326719923058586
42 0.17398591031731825
43 0.22877678550444946
44 0.1732700

In [96]:
# these are sigmoid outputs
logits = nn(xs)
y_pred = softmax_score(logits)
y_pred

array([[Value(data=5.722926887141216e-28),
        Value(data=0.0005793815331036077),
        Value(data=0.9994206184668963)],
       [Value(data=2.1793595337086692e-11),
        Value(data=0.9999999999782063),
        Value(data=9.269811794827091e-84)],
       [Value(data=0.9999999973523063), Value(data=2.25443625732752e-09),
        Value(data=3.9325736050659133e-10)],
       [Value(data=0.007982234929550611),
        Value(data=0.002420151933171855), Value(data=0.9895976131372775)],
       [Value(data=0.9910703859355751),
        Value(data=0.0005694585835538227),
        Value(data=0.008360155480870969)],
       [Value(data=6.242421238205206e-189),
        Value(data=0.999847504397109),
        Value(data=0.00015249560289110428)]], dtype=object)

TODO:
* DataLoader for seperating data to batches
* Softmax regression and its loss function - Done, Testing Phase
* Softmax nn structure - Done, Testing phase