# Veštačke neuronske mreže
Zasnovano na:\
https://github.com/karpathy/micrograd \
https://www.youtube.com/watch?v=VMj-3S1tku0

In [10]:
import math
import numpy as np
import matplotlib.pyplot as plt
import random
%matplotlib inline

In [11]:
class Value:
    
    def __init__(self, data, _children=()):
        """
        data (number) -> Vrednost čvora u expression grafu
        grad -> Vrednost lokalnog gradijenta, koristi se za backprop
        _children -> Transformiše se u _prev, predstavlja čvorove koji prethode tretutnom -> potrebno za backprop
        """
        self.data = data
        self._prev = set(_children) # tuple koji treba da predstavi prethodne čvorove u expression grafu
        self.grad = 0.0 # Inicijalno ga postavljamo na 0 -> nema efekat na izlaz
        self._backward = lambda:None # funkcija koja je zaduzena za primenu pravila ulančavanja, po defaultu prazna funkcija -> za listove

    def __repr__(self):
        return f"Value(data={self.data}, grad={self.grad})"
    
    # Overload operatora
    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other) # kako bismo podrzali a + 1
        out = Value(self.data + other.data, (self, other))
        # Prosirenje svakog "cvora" u grafu
        # Funkcija koja propagira gradijent
        def _backward():
            # CLOSURE
            # kod sabiranja samo propagiramo gradijent
            self.grad += 1.0 * out.grad # MORA +=, ZAŠTO -> odgovor b = a + a
            other.grad += 1.0 * out.grad
        out._backward = _backward
        return out
    
    def __radd__(self, other):
        return self + other
    
    def __mul__(self, other):
        # ZADATAK 1: dovrši implementaciju
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other))
        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward
        return out
    
    def __rmul__(self, other):
        # Jer Python ne zna eksplicitno da je a * 2 isto što i 2 * a
        return self * other
    # Za potrebe aktivacione funkcije (npr. TANH) moramo da overloadujemo dodatne operatore
    # jer nam + i * nisu dovoljni
    # Sa druge strane, nije neophodno da dodjemo do atomičnog nivoa
    # Funkcije koje pravimo mogu biti proizvoljno kompleksne
    # DOKLE GOD MOZEMO DA IZRAČUNAMO LOKALNI IZVOD
    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
        def _backward():
            # ZADATAK 2: dovrši implementaciju
            self.grad = (1 - t ** 2) * out.grad
        out = Value(t, (self, ))
        out._backward = _backward
        return out
    
    def exp(self):
        x = self.data
        out = Value(math.exp(x), (self, ))
    
        def _backward():
            self.grad += out.data * out.grad
        out._backward = _backward()
        return out
    
    def __pow__(self, other):
        assert isinstance(other, (int, float)) # only support int/float powers
        out = Value(self.data ** other, (self, ))

        def _backward():
            self.grad += other * (self.data ** (other - 1)) * out.grad

        out._backward = _backward
        return out
    
    def __truediv__(self, other):
        # a / b
        # a * (1 / b)
        # a * b ^ -1
        # stepen ima prioritet u odnosu na *
        return self * other ** -1
    
    def __neg__(self):
        return self * -1

    def __sub__(self, other):
        return self + (-other)
    
    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)

        self.grad = 1.0
        # Backpropagation je rekurzivna primena pravila ulančavanja kroz graf
        for node in reversed(topo):
            node._backward()

    

![Topološko sortiranje grafa](img/topo.png)
U nastavku su implementirane apstrakcije koje nam olakšavaju kreiranje neuronske mreže. 

In [12]:
class Neuron:

    # Prilikom kreiranja neurona, svakom ulazu se pridružuje težina
    # I dodajemo bias
    def __init__(self, nin):
        self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1, 1))

    def __call__(self, x):
        act = self.b
        for wi, xi in zip(self.w, x):
            act += wi * xi
        out = act.tanh()
        return out

    def params(self):
        return self.w + [self.b]

In [13]:
class Layer:
    # Broj ulaza u neuron je jednak broju neurona iz prethodnog sloja
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]
    
    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs
    
    def params(self):
        params = []
        for neuron in self.neurons:
            params.extend(neuron.params())
        return params

In [14]:
class MLP:
    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i + 1]) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def params(self):
        return [p for layer in self.layers for p in layer.params()]

In [15]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]

n = MLP(3, [4, 4, 1])
ys = [1.0, -1.0, -1.0, 1.0] # desired targets
ypred=[n(x) for x in xs]
ypred

[[Value(data=0.8504102225958426, grad=0.0)],
 [Value(data=0.9333648547303176, grad=0.0)],
 [Value(data=0.7943337900980459, grad=0.0)],
 [Value(data=0.9338881229295876, grad=0.0)]]

In [16]:
# Trening neuronske mreže
num_epochs = 250
learning_rate = 0.01
for k in range(num_epochs):
    # forward pass
    ypred = [n(x) for x in xs]
    loss = sum(((yout[0] - ygt) ** 2 for ygt, yout in zip(ys, ypred)), Value(0.0))

    # zero grad
    for p in n.params():
        p.grad = 0.0

    # backward pass
    # Za svaki cvor u grafu računamo izvod u odnosu na L
    loss.backward()

    # update
    for p in n.params():
        p.data += -learning_rate * p.grad
    
    print(k, loss.data)

0 6.9842812935876
1 6.869183365184368
2 6.731553962562069
3 6.5654192273229555
4 6.362983626153945
5 6.114251500768667
6 5.807062854611538
7 5.428531385478896
8 4.969755834354813
9 4.4354270682815935
10 3.8549507179853455
11 3.2819912067456687
12 2.7715336357226774
13 2.3512813752629933
14 2.0173557677805913
15 1.7515096601155584
16 1.5360172332388067
17 1.3581721439580177
18 1.2094777708951843
19 1.0840371002206726
20 0.977481552565535
21 0.8864132258355424
22 0.8081218593016767
23 0.7404227686772584
24 0.6815470716924266
25 0.6300584035336211
26 0.5847868018107812
27 0.544775736804733
28 0.5092397891744491
29 0.4775309801898987
30 0.44911205733192283
31 0.4235353105640892
32 0.40042575395288477
33 0.3794677394919611
34 0.36039426645027056
35 0.34297840952125136
36 0.3270264161487755
37 0.31237212292699074
38 0.298872418263157
39 0.28640353828428744
40 0.27485802916977864
41 0.26414224481815496
42 0.2541742764356235
43 0.2448822321390009
44 0.23620280142831968
45 0.22808005249833774
4

In [17]:
ypred=[n(x) for x in xs]
ypred

[[Value(data=0.9143671591342561, grad=0.0)],
 [Value(data=-0.9390700920114902, grad=0.0)],
 [Value(data=-0.8937088569622151, grad=0.0)],
 [Value(data=0.938155585238551, grad=0.0)]]

## PyTorch

PyTorch je open-source biblioteka za mašinsko učenje.

Koristićemo je kako bismo jednostavno kreirali i trenirali neuronske mreže.

Pored osnovnih stvari PyTorch nam pruža velik broj opcija za optimizaciju, vizualizaciju...

Dosadašnji kod smo koristili kako bismo razumeli kako funkcioniše trening neuronske mreže. U nastavku ćemo koristiti PyTorch da kreiramo isti model.

Tensor je osnovna jedinica podataka u PyTorch-u (višedimenzionalni nizovi). U dosadašnjoj implementaciji ta osnovna jedinica je bila skalar.

"Matematika ostaje potpuno identična, tensori se uvode isključivo zbog efikasnosti." - <i>Andrej Karpathy</i>

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim

# Kreiranje 2 potpuno ekvivalentna modela pomoću PyTorch
model1 = nn.Sequential(
    nn.Linear(3, 4),
    nn.Tanh(),
    nn.Linear(4, 4),
    nn.Tanh(),
    nn.Linear(4, 1),
    nn.Tanh()
)

class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 4)
        self.fc2 = nn.Linear(4, 4)
        self.fc3 = nn.Linear(4, 1)
        self.tanh = nn.Tanh()
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        return out

model2 = MLP(3)


class CustomLoss(nn.Module):
    def __init__(self):
        super(CustomLoss, self).__init__()

    def forward(self, y_pred, y_true):
        loss = torch.tensor(0.0)
        for y_gt, y_out in zip(y_true, y_pred):
            loss += torch.sum((y_out[0] - y_gt) ** 2)
        return loss

num_epochs = 250
loss_fn = CustomLoss()
optimizer = optim.SGD(model1.parameters(), lr=0.01)

X = torch.tensor(xs)
y = torch.tensor(ys)

for n in range(num_epochs):
    y_pred = model1(X)
    loss = loss_fn(y_pred, y)

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()
    
    if n % 50 == 0:
        print(f'Epoch [{n+1}/{num_epochs}], Loss: {loss.item():.4f}')
y_pred

Epoch [1/250], Loss: 4.1201
Epoch [51/250], Loss: 0.1673
Epoch [101/250], Loss: 0.0544
Epoch [151/250], Loss: 0.0302
Epoch [201/250], Loss: 0.0204


tensor([[ 0.9527],
        [-0.9462],
        [-0.9279],
        [ 0.9298]], grad_fn=<TanhBackward0>)