In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

In [2]:
class Tensor:
    def __init__(self, data, autograd=False, creators=None, creation_op=None, idr=None):
        
        self.data = np.array(data)
        self.creators = creators
        self.creation_op = creation_op
        self.grad = None
        self.autograd = autograd
        self.children = {}
        if idr is None:
            idr = np.random.randint(0, 100000)
        self.idr = idr
        
        if creators:
            for c in creators:
                c.children[idr] = c.children.get(idr, 0) + 1
                
    def all_children_grads_accounted_for(self):
        for idr, cnt in self.children.items():
            if cnt != 0:
                return False
        return True
        
    def backward(self, grad=None, grad_origin=None):
        
        if self.autograd:
            if grad_origin:
                if self.children[grad_origin.idr] == 0:
                    raise Exception("can't backprop more than once")
                else:
                    self.children[grad_origin.idr] -= 1
                    
            if not grad:
                grad = Tensor(np.ones_like(self.data))
            if self.grad:
                self.grad += grad
            else:
                self.grad = grad
                
            if self.creators and (self.all_children_grads_accounted_for() or not grad_origin):
                if self.creation_op == 'add':
                    self.creators[0].backward(self.grad, self)
                    self.creators[1].backward(self.grad, self)
                    
                if self.creation_op == 'neg':
                    self.creators[0].backward(self.grad.__neg__())
                    
                if self.creation_op == 'sub':
                    new = Tensor(self.grad.data)
                    self.creators[0].backward(new, self)
                    new = Tensor(self.grad.__neg__().data)
                    self.creators[1].backward(new, self)
                    
                if self.creation_op == 'mul':
                    new = self.grad * self.creators[1]
                    self.creators[0].backward(new, self)
                    new = self.grad * self.creators[0]
                    self.creators[1].backward(new, self)
                    
                if self.creation_op == 'mm':
                    act = self.creators[0]
                    weights = self.creators[1]
                    new = self.grad.mm(weights.transpose())
                    act.backward(new)
                    new = self.grad.transpose().mm(act).transpose()
                    weights.backward(new)
                    
                if self.creation_op == 'transpose':
                    self.creators[0].backward(self.grad.transpose())
                
                if self.creation_op.startswith('sum'):
                    dim = int(self.creation_op.split('_')[1])
                    ds = self.creators[0].data.shape[dim]
                    self.creators[0].backward(self.grad.expand(dim, ds))
                
                if self.creation_op.startswith('expand'):
                    dim = int(self.creation_op.split('_')[1])
                    self.creators[0].backward(self.grad.sum(dim))
                    
                if self.creation_op == 'sigmoid':
                    ones = Tensor(np.ones_like(self.grad.data))
                    self.creators[0].backward(self.grad * (self * (ones - self)))
                    
                if self.creation_op == 'softmax':
                    self.creators[0].backward(self.grad * self)
                    
                if self.creation_op == 'tanh':
                    ones = Tensor(np.ones_like(self.grad.data))
                    self.creators[0].backward(self.grad * (ones - (self * self)))
                    
                if self.creation_op == 'index_select':
                    new_grad = np.zeros_like(self.creators[0].data)
                    indices_ = self.index_select_indices.data.flatten()
                    grad_ = grad.data.reshape(len(indices_), -1)
                    for i in range(len(indices_)):
                        new_grad[indices_[i]] += grad_[i]
                    self.creators[0].backward(Tensor(new_grad))
                    
                if self.creation_op == 'cross_entropy':
                    dx = self.softmax_output - self.target_dist
                    self.creators[0].backward(Tensor(dx))
                    
        
    def __add__(self, other):
        if self.autograd and other.autograd:
            return Tensor(self.data + other.data,
                          autograd=True,
                         creators=[self, other],
                         creation_op='add')
        else:
            return Tensor(self.data + other.data)
        
    def __neg__(self):
        if self.autograd:
            return Tensor(self.data * -1,
                         autograd=True,
                         creators=[self],
                         creation_op='neg')
        else:
            return Tensor(self.data * -1)
        
    def __sub__(self, other):
        if self.autograd and other.autograd:
            return Tensor(self.data - other.data,
                         autograd=True,
                         creators=[self, other],
                         creation_op='sub')
        else:
            return Tensor(self.data - other.data)
        
    def __mul__(self, other):
        if self.autograd and other.autograd:
            return Tensor(self.data * other.data,
                         autograd=True,
                         creators=[self, other],
                         creation_op='mul')
        else:
            return Tensor(self.data * other.data)
        
    def sum(self, dim):
        if self.autograd:
            return Tensor(self.data.sum(dim),
                         autograd=True,
                         creators=[self],
                         creation_op='sum_' + str(dim))
        else:
            return Tensor(self.data.sum(dim))
        
    def expand(self, dim, copies):
        trans_cmd = list(range(0, len(self.data.shape)))
        trans_cmd.insert(dim, len(self.data.shape))
        new_shape = list(self.data.shape) + [copies]
        new_data = self.data.repeat(copies).reshape(new_shape)
        new_data = new_data.transpose(trans_cmd)
        
        if self.autograd:
            return Tensor(new_data,
                         autograd=True,
                         creators=[self],
                         creation_op='expand_' + str(dim))
        else:
            return Tensor(new_data)
        
    def transpose(self):
        if self.autograd:
            return Tensor(self.data.transpose(),
                         autograd=True,
                         creators=[self],
                         creation_op='transpose')
        else:
            return Tensor(self.data.transpose())
        
    def mm(self, x):
        if self.autograd:
            return Tensor(self.data.dot(x.data),
                         autograd=True,
                         creators=[self, x],
                         creation_op='mm')
        else:
            return Tensor(self.data.dot(x.data))
        
    def sigmoid(self):
        if self.autograd:
            return Tensor(1 / (1 + np.exp(-self.data)),
                         autograd=True,
                         creators=[self],
                         creation_op='sigmoid')
        else:
            return Tensor(1 / (1 + np.exp(-self.data)))
        
    def softmax(self):
        if self.autograd:
            temp = np.exp(self.data)
            softmax_output = temp / np.sum(temp, axis=len(self.data.shape) - 1, keepdims=True)
            return Tensor(softmax_output,
                         autograd=True,
                         creators=[self],
                         creation_op='softmax')
        else:
            return Tensor(softmax_output)
        
    def tanh(self):
        if self.autograd:
            return Tensor(np.tanh(self.data),
                         autograd=True,
                         creators=[self],
                         creation_op='tanh')
        else:
            return Tensor(np.tanh(self.data))
        
    def index_select(self, indices):
        if self.autograd:
            new = Tensor(self.data[indices.data],
                        autograd=True,
                        creators=[self],
                        creation_op='index_select')
            new.index_select_indices = indices
            return new
        else:
            Tensor(self.data[indices.data])
            
    def cross_entropy(self, target_indices):
        
        temp = np.exp(self.data)
        softmax_output = temp / np.sum(temp, axis=len(self.data.shape) - 1,
                                      keepdims=True)
        t = target_indices.data.flatten()
        p = softmax_output.reshape(len(t), -1)
        target_dist = np.eye(p.shape[1])[t]
        loss = -(np.log(p) * (target_dist)).sum(1).mean()
        
        if self.autograd:
            out = Tensor(loss,
                        autograd=True,
                        creators=[self],
                        creation_op='cross_entropy')
            out.softmax_output = softmax_output
            out.target_dist = target_dist
            return out
        else:
            return Tensor(loss)
    
    def __repr__(self):
        return str(self.data.__repr__())
    
    def __str__(self):
        return str(self.data.__str__())

In [104]:
class SGD:
    def __init__(self, parameters, alpha=0.1):
        self.parameters = parameters
        self.alpha = alpha
        
    def zero(self):
        for p in self.parameters:
            p.grad.data *= 0
    
    def step(self, zero=True):
        for p in self.parameters:
            p.data -= p.grad.data * self.alpha
            if zero:
                p.grad.data *= 0

In [4]:
class Layer:
    def __init__(self):
        self.parameters = list()
        
    def get_parameters(self):
        return self.parameters

In [5]:
class Linear(Layer):
    def __init__(self, n_inputs, n_outputs):
        super().__init__()
        W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2 / n_inputs)
        self.weight = Tensor(W, autograd=True)
        self.bias = Tensor(np.zeros(n_outputs), autograd=True)
        
        self.parameters.append(self.weight)
        self.parameters.append(self.bias)
    
    def forward(self, inputs):
        return inputs.mm(self.weight) + self.bias.expand(0, len(inputs.data))

In [6]:
class Sequential(Layer):
    def __init__(self, layers=list()):
        super().__init__()
        self.layers = layers
    
    def add(self, layer):
        self.layers.append(layer)
        
    def forward(self, inputs):
        for layer in self.layers:
            inputs = layer.forward(inputs)
        return inputs
    
    def get_parameters(self):
        params = list()
        for l in self.layers:
            params += l.get_parameters()
        return params

In [111]:
class MSELoss(Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self, pred, target):
        return ((pred - target) * (pred - target)).sum(0)
        # return ((pred - target) * (pred - target))

In [8]:
class Tanh(Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self, inputs):
        return inputs.tanh()

In [9]:
class Sigmoid(Layer):
    def __init__(self):
        super().__init__()
        
    def forward(self, inputs):
        return inputs.sigmoid()

In [10]:
class Softmax(Layer):
    def __init__(self):
        super().__init__()
        
    def forward(self, inputs):
        return inputs.softmax()

In [11]:
class Embedding(Layer):
    def __init__(self, vocab_size, dim):
        super().__init__()
        
        self.vocab_size = vocab_size
        self.dim = dim
        
        weight = (np.random.rand(vocab_size, dim) - 0.5) / dim
        self.weight = Tensor(weight, autograd=True)
        self.parameters.append(self.weight)
        
    def forward(self, inputs):
        return self.weight.index_select(inputs)

In [12]:
class CrossEntropyLoss:
    def __init_(self):
        super().__init__()
        
    def forward(self, inputs, targets):
        return inputs.cross_entropy(targets)

In [13]:
class RNNCell(Layer):
    def __init__(self, n_inputs, n_hidden, n_output, activation='sigmoid'):
        super().__init__()
        
        self.n_inputs = n_inputs
        self.n_hidden = n_hidden
        self.n_output = n_output
        
        if activation == 'sigmoid':
            self.activation = Sigmoid()
        elif activation == 'tanh':
            self.activation = Tanh()
        else:
            raise Exception('Non-linearity not found')
            
        self.w_ih = Linear(n_inputs, n_hidden)
        self.w_hh = Linear(n_hidden, n_hidden)
        self.w_ho = Linear(n_hidden, n_output)
        
        self.parameters += self.w_ih.get_parameters()
        self.parameters += self.w_hh.get_parameters()
        self.parameters += self.w_ho.get_parameters()
        
    def forward(self, inputs, hidden):
        from_prev_hidden = self.w_hh.forward(hidden)
        combined = self.w_ih.forward(inputs) + from_prev_hidden
        new_hidden = self.activation.forward(combined)
        output = self.w_ho.forward(new_hidden)
        return output, new_hidden
    
    def init_hidden(self, batch_size=1):
        return Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True)

In [14]:
a = Tensor([1, 2, 3, 4, 5], autograd=True)
b = Tensor([2, 2, 2, 2, 2], autograd=True)
c = Tensor([5, 4, 3, 2, 1], autograd=True)
g = Tensor([6, 7, 8, 9, 10], autograd=True)

d = a + b
e = b + c
f = d + e

f.backward(Tensor([1, 1, 2, 1, 1]))


print(b.grad.data)

[2 2 4 2 2]


In [112]:
data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), autograd=True)
target = Tensor(np.array([[0], [1], [0], [1]]), autograd=True)

model = Sequential([Linear(2, 3), Tanh(), Linear(3, 1), Sigmoid()])
criterion = MSELoss()
        
optim = SGD(parameters=model.get_parameters(), alpha=1)

for i in range(25):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(i, loss)

0 [2.10217643]
1 [1.81873036]
2 [1.66450385]
3 [1.3043611]
4 [0.70899744]
5 [0.52007822]
6 [0.36230166]
7 [0.24114239]
8 [0.16432991]
9 [0.1185035]
10 [0.0904326]
11 [0.07209722]
12 [0.05940884]
13 [0.05021683]
14 [0.04330444]
15 [0.03794565]
16 [0.03368593]
17 [0.03022856]
18 [0.02737273]
19 [0.02497827]
20 [0.02294467]
21 [0.02119814]
22 [0.01968344]
23 [0.01835841]
24 [0.01719038]


In [113]:
data = Tensor(np.array([1, 2, 1, 2]), autograd=True)
target = Tensor(np.array([[0],
                         [1],
                         [0],
                         [1]]), autograd=True)

embed = Embedding(7, 3)
model = Sequential([embed, Tanh(), Linear(3, 1), Sigmoid()])
criterion = MSELoss()

optim = SGD(parameters=model.get_parameters(), alpha=0.5)

for i in range(25):
    pred = model.forward(data)  # прямое распространение
    loss = criterion.forward(pred, target)  # находим ошибку
    loss.backward(Tensor(np.ones_like(loss.data)))  # вычисляем градиенты
    optim.step()  # Корректируем  веса
    print(i, loss)

0 [1.09345749]
1 [0.66336982]
2 [0.41780476]
3 [0.26952761]
4 [0.18308312]
5 [0.13210084]
6 [0.10047446]
7 [0.07968441]
8 [0.06528621]
9 [0.05486944]
10 [0.04705724]
11 [0.04102219]
12 [0.03624388]
13 [0.03238165]
14 [0.02920475]
15 [0.02655208]
16 [0.02430822]
17 [0.0223886]
18 [0.02072997]
19 [0.01928419]
20 [0.01801404]
21 [0.01689034]
22 [0.0158899]
23 [0.01499409]
24 [0.0141878]


In [114]:
data = Tensor(np.array([1, 2, 1, 2]), autograd=True)
target = Tensor(np.array([0, 1, 0, 1]), autograd=True)
model = Sequential([Embedding(3, 3), Tanh(), Linear(3, 4)])
criterion = CrossEntropyLoss()

optim = SGD(parameters=model.get_parameters(), alpha=0.1)

for i in range(25):
    pred = model.forward(data) 
    loss = criterion.forward(pred, target)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(i + 1, loss)

1 1.3565187521342192
2 0.975111452377638
3 0.722617821915376
4 0.5555894243377575
5 0.44137355129219086
6 0.3603557230372992
7 0.3009934976114045
8 0.25626112058327233
9 0.22171851539026366
10 0.19446664919401047
11 0.17255816981435335
12 0.15465133624915467
13 0.13980008380954706
14 0.12732312061664763
15 0.11672027792231379
16 0.10761783784357454
17 0.09973204253631165
18 0.09284425664294993
19 0.08678374974905959
20 0.08141555616777205
21 0.07663177816562065
22 0.07234526356945385
23 0.06848494621256507
24 0.06499236794421728
25 0.061819051700820304
