In [1]:
import numpy as np
#from OwnFrame import Tensor

In [2]:
class Tensor (object):

    def __init__(self, data, autograd=False, creators=None, creation_op=None, id=None):
        self.data = np.array(data)
        self.creation_op = creation_op
        self.creators = creators
        self.grad = None
        self.autograd = autograd
        self.children = {}
        if id is None:
            self.id = np.random.randint(0, 100000)
        else:
            self.id = id
        
        # скорректировать число потомков данного тензора
        if creators is not None:
            for c in creators:
                if self.id not in c.children:
                    c.children[self.id] = 1
                else:
                    c.children[self.id] += 1
        
    
    # проверить, получил ли тензор градиенты для всех потомков
    def all_children_grads_accounted_for(self):
       for id, cnt in self.children.items():
            if cnt != 0:
                return False
       return True
       
       
    #вычисление обратного распространения
    def backward(self, grad=None, grad_origin=None):
        if self.autograd:
            
            # первый градиентный вызов из единиц
            if grad is None:
                grad = Tensor(np.ones_like(self.data))
            
            if grad_origin is not None:
                # проверка возможности обратного распространения
                # или ожидания градиента,
                # в последнем случае нужно уменьшить счетчик
                if self.children[grad_origin.id] == 0:
                    raise Exception("cannot backprop more than once")
                else:
                    self.children[grad_origin.id] -= 1
            
            # накопление градиентов от нескольких потомков
            if self.grad  is None:
                self.grad = grad
            else:
                self.grad += grad
            
            assert grad.autograd == False
            
            if (self.creators is not None) and (self.all_children_grads_accounted_for() or grad_origin is None):
                
                if self.creation_op == 'add':
                    self.creators[0].backward(self.grad, self)
                    self.creators[1].backward(self.grad, self) # начало обратного распространения
                
                if self.creation_op == "neg":
                    self.creators[0].backward(self.grad.__neg__())
                
                if self.creation_op == "sub":
                    new = Tensor(self.grad.data)
                    self.creators[0].backward(new, self)
                    new = Tensor(self.grad.__neg__().data)
                    self.creators[1].backward(new, self)
                
                if self.creation_op == "mul":
                    new = self.grad * self.creators[1]
                    self.creators[0].backward(new, self)
                    new = self.grad * self.creators[0]
                    self.creators[1].backward(new, self)
                
                if self.creation_op == "mm":
                    act = self.creators[0] # обычно слой активации
                    weights = self.creators[1] # обычно весовая матрица
                    new = self.grad.mm(weights.transpose())
                    act.backward(new)
                    new = self.grad.transpose().mm(act).transpose()
                    weights.backward(new)
                
                if self.creation_op == "transpose":
                    self.creators[0].backward(self.grad.transpose())
                
                if "sum" in self.creation_op:
                    dim = int(self.creation_op.split("_")[1])
                    ds = self.creators[0].data.shape[dim]
                    self.creators[0].backward(self.grad.expand(dim, ds))
                
                if "expand" in self.creation_op:
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.sum(dim))
                
                if self.creation_op == "sigmoid":
                    ones = Tensor(np.ones_like(self.grad.data))
                    self.creators[0].backward(self.grad * (self * (ones - self)))
                
                if self.creation_op == "tanh":
                    ones = Tensor(np.ones_like(self.grad.data))
                    self.creators[0].backward(self.grad * (ones - (self * self)))
                
                if self.creation_op == "index_select":
                    new_grad = np.zeros_like(self.creators[0].data)
                    indices_ = self.index_select_indices.data.flatten() # преобразование индексов в плоский вектор
                    grad_ = grad.data.reshape(len(indices_), -1) # свораичиваем гадиент в простой список строк
                    
                    #  на данном моменте индексы indices_ и grad_ будут иметь соответсвующий порядок
                    
                    for i in range(len(indices_)):
                        new_grad[indices_[i]] += grad_[i]
                    self.creators[0].backward(Tensor(new_grad))
                
                if self.creation_op == "cross_entropy":
                    dx = self.softmax_output - self.target_dist
                    self.creators[0].backward(Tensor(dx))
                
            
    # реализация метода сложения
    def __add__(self, other):
        if self.autograd and other.autograd:
                return Tensor(self.data + other.data, autograd=True,
                              creators=[self, other],
                              creation_op='add')
        return Tensor(self.data + other.data, creators=[self,other], creation_op='add')
    
    # реализация метода отрицания (negative)
    def __neg__(self):
        if self.autograd:
            return Tensor(self.data * -1,
                          autograd=True,
                          creators=[self],
                          creation_op="neg")
        return Tensor(self.data * -1)
    
    # реализация метода вычитания
    def __sub__(self, other):
        if self.autograd and other.autograd:
            return Tensor(self.data - other.data,
                          autograd=True,
                          creators=[self, other],
                          creation_op="sub")
        return Tensor(self.data - other.data)
    
    # реализация метода умножения
    def __mul__(self, other):
        if self.autograd and other.autograd:
            return Tensor(self.data * other.data,
                          autograd=True,
                          creators=[self, other],
                          creation_op="mul")
        return Tensor(self.data * other.data)
    
    # реализация метода суммирования всех эллементов массива 
    def sum(self, dim):
        if self.autograd:
            return Tensor(self.data.sum(dim),
                          autograd=True,
                          creators=[self],
                          creation_op="sum_" + str(dim),)
        return Tensor(self.data.sum(dim))
    
    # реализация метода расширения
    def expand(self, dim, copies):
        
        trans_cmd = list(range(0, len(self.data.shape)))
        trans_cmd.insert(dim, len(self.data.shape))
        new_shape = list(self.data.shape) + [copies]
        new_data = self.data.repeat(copies).reshape(new_shape)
        new_data = new_data.transpose(trans_cmd)
        
        if self.autograd:
            return Tensor(new_data,
                          autograd=True,
                          creators=[self],
                          creation_op="expand_" + str(dim))
        return Tensor(new_data)
    
    #  реализация метода транспонирования
    def transpose(self):
        if self.autograd:
            return Tensor(self.data.transpose(),
                          autograd=True,
                          creators=[self],
                          creation_op="transpose")
        return Tensor(self.data.transpose())
    
    # реализация матричного умножения
    def mm(self, x):
        if self.autograd:
            return Tensor(self.data.dot(x.data),
                          autograd=True,
                          creators=[self, x],
                          creation_op="mm")
        return Tensor(self.data.dot(x.data))
    
    # функции активации нейронов
    
    def sigmoid(self):
        if self.autograd:
            return Tensor(1 / (1 + np.exp(-self.data)),
                          autograd=True,
                          creators=[self],
                          creation_op='sigmoid')
        return Tensor(1 / (1 + np.exp(-self.data)))
    
    
    def tanh(self):
        if self.autograd:
            return Tensor(np.tanh(self.data),
                          autograd=True,
                          creators=[self],
                          creation_op="tanh")
        return Tensor(np.tanh(self.data))
    
    def index_select(self, indices):
        
        if self.autograd:
            new = Tensor(self.data[indices.data],
                         autograd=True,
                         creators=[self],
                         creation_op="index_select")
            new.index_select_indices = indices
            return new
        
        return Tensor(self.data[indices.data])
    
    def cross_entropy(self, target_indices):
        
        temp = np.exp(self.data)
        softmax_output = temp / np.sum(temp,
                                       axis=len(self.data.shape) - 1,
                                       keepdims=True)
        
        t = target_indices.data.flatten()
        p = softmax_output.reshape(len(t), -1)
        target_dist = np.eye(p.shape[1])[t]
        loss = -(np.log(p) * (target_dist)).sum(1).mean()
        
        if self.autograd:
            out = Tensor(loss,
                         autograd=True,
                         creators=[self],
                         creation_op="cross_entropy")
            out.softmax_output = softmax_output
            out.target_dist = target_dist
            return out
        
        return Tensor(loss)

    def __repr__(self):
        return str(self.data.__repr__())

    def __str__(self):
        return str(self.data.__str__())

class SGD (object):

    def __init__(self, parameters, alpha = 0.1):
        self.parameters = parameters
        self.alpha = alpha

    def zero(self):
        for p in self.parameters:
            p.grad.data *= 0

    def step(self, zero=True):
        for p in self.parameters:
            p.data -= p.grad.data * self.alpha

            if zero:
                p.grad.data *= 0

class Layer (object):
    def __init__(self):
        self.parameters = list()
        
    def get_parameters(self):
        return self.parameters

class Linear(Layer):
    
    def __init__(self, n_inputs, n_outputs):
        super().__init__()
        W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0 / n_inputs)
        self.weight = Tensor(W, autograd=True)
        self.bias = Tensor(np.zeros(n_outputs), autograd=True)
        
        self.parameters.append(self.weight)
        self.parameters.append(self.bias)
    
    def forward(self, input):
        return input.mm(self.weight) + self.bias.expand(0, len(input.data))

class Sequential(Layer):
    
    def __init__(self, layers=list()):
        super().__init__()
        self.layers = layers
    
    def add(self, layer):
        self.layers.append(layer)
    
    def forward(self, input):
        for layer in self.layers:
            input = layer.forward(input)
        return input
    
    def get_parameters(self):
        params = list()
        for l in self.layers:
            params += l.get_parameters()
        return params
    
class MSELoss (Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self, pred, target):
        return ((pred - target) * (pred - target)).sum(0)

class Tanh(Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self, input):
        return input.tanh()

class Sigmoid(Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self, input):
        return input.sigmoid()

class Embedding(Layer):
   
    def __init__(self, vocab_size, dim):
        super().__init__()
        
        self.vocab_size = vocab_size
        self.dim = dim
        
        weight = (np.random.rand(vocab_size, dim) - 0.5) / dim 
        self.weight = Tensor(weight, autograd=True)
        
        self.parameters.append(self.weight)
        
    def forward(self, input):
        return self.weight.index_select(input)
    
class CrossEntropyLoss(object):
    
    def __init__(self):
        super().__init__()
    
    def forward(self, input, target):
        return input.cross_entropy(target)

class RNNCell(Layer):
    def __init__(self, n_input, n_hidden, n_output, activation='sigmoid'):
        super().__init__()
        
        self.n_input = n_input
        self.n_hidden = n_hidden
        self.n_output = n_output
        
        if activation == 'sigmoid':
            self.activation = Sigmoid()
        elif activation == 'tanh':
            self.activation = Tanh()
        else:
            raise Exception("Non-linearity not found")
        
        self.w_ih = Linear(n_input, n_hidden)
        self.w_hh = Linear(n_hidden, n_hidden)
        self.w_ho = Linear(n_hidden, n_output)
        
        
        self.parameters += self.w_ih.get_parameters()
        self.parameters += self.w_hh.get_parameters()
        self.parameters += self.w_ho.get_parameters()
    
    def forward(self, input, hidden):
        from_prev_hidden = self.w_hh.forward(hidden)
        combined = self.w_ih.forward(input) + from_prev_hidden
        new_hidden = self.activation.forward(combined)
        output = self.w_ho.forward(new_hidden)
        
        return output, new_hidden
    
    def init_hidden(self, batch_size):
        return Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True)
    
        

In [3]:
x = Tensor([1, 2, 3, 4, 5])
x

array([1, 2, 3, 4, 5])

In [4]:
y = x + x
y

array([ 2,  4,  6,  8, 10])

In [5]:
x = Tensor([1, 2, 3, 4, 5])
y = Tensor([2, 2, 2, 2, 2])

z = x + y
z.backward(Tensor(np.array([1, 1, 1, 1, 1])))

In [6]:
z

array([3, 4, 5, 6, 7])

In [7]:
print(x.grad)
print(y.grad)
print(z.creators)
print(z.creation_op)

None
None
[array([1, 2, 3, 4, 5]), array([2, 2, 2, 2, 2])]
add


Рекурсия автоматического вычисления градиента

In [8]:
a = Tensor([1, 2, 3, 4, 5])
b = Tensor([2, 2, 2, 2, 2])
c = Tensor([5, 4, 3, 2, 1])
d = Tensor([-1, -2, -3, -4, -5])

e = a + b
f = c + d
g = e + f

g.backward(Tensor(np.array([1, 1, 1, 1, 1])))

In [16]:
print(a.grad.data)

[1 1 1 1 1]


<h3>Тензоры используемые многократно<h3>

In [10]:
a = Tensor([1, 2, 3, 4, 5], autograd=True)
b = Tensor([2, 2, 2, 2, 2], autograd=True)
c = Tensor([5, 4, 3, 2, 1], autograd=True)

d = a + b
e = b + c
f = d + e
f.backward(Tensor(np.array([1, 1, 1, 1, 1])))

print(b.grad.data == np.array([2, 2, 2, 2, 2]))

[ True  True  True  True  True]


<h3>Реализация метода отрицания<h3>

In [15]:
a = Tensor([1, 2, 3, 4, 5], autograd=True)
b = Tensor([2, 2, 2, 2, 2], autograd=True)
c = Tensor([5, 4, 3, 2, 1], autograd=True)

d = a + (-b)
print(f'd: {d}')
e = (-b) + c
print(f'e: {e}')
f = d + e
print(f'f: {f}')

f.backward(Tensor(np.array([1, 1, 1, 1, 1])))

print(b.grad.data == np.array([-2, -2, -2, -2, -2]))

d: [-1  0  1  2  3]
e: [ 3  2  1  0 -1]
f: [2 2 2 2 2]
[ True  True  True  True  True]


Использование autograd в обучении

In [20]:
np.random.seed(0)

data = np.array([[0,0],
                 [0,1],
                 [1,0],
                 [1,1]])
target = np.array([[0],
                   [1],
                   [0],
                   [1]])

weights_0_1 = np.random.rand(2, 3)
weights_1_2 = np.random.rand(3, 1)


In [21]:
for i in range(10):
    
    layer_1 = data.dot(weights_0_1)
    layer_2 = layer_1.dot(weights_1_2)
    
    diff = (layer_2 - target)
    sqdiff = (diff * diff)
    loss = sqdiff.sum(0) # потеря как среднеквадратическая ошибка
    
    layer_1_grad = diff.dot(weights_1_2.transpose())
    weights_1_2_update = layer_1.transpose().dot(diff)
    weights_0_1_update = data.transpose().dot(layer_1_grad)
    
    weights_1_2 -= weights_1_2_update * 0.1
    weights_0_1 -= weights_0_1_update * 0.1
    print(loss[0])

5.066439994622395
0.4959907791902342
0.4180671892167177
0.35298133007809646
0.2972549636567377
0.24923260381633286
0.20785392075862477
0.17231260916265181
0.14193744536652994
0.11613979792168386


То же самое только с использованием фреймворка

In [32]:
data = Tensor(np.array([[0,0],
                        [0,1],
                        [1,0],
                        [1,1]]), autograd=True)

target = Tensor(np.array([[0],
                          [1],
                          [0],
                          [1]]), autograd=True)

w = list()
w.append(Tensor(np.random.rand(2,3), autograd=True))
w.append(Tensor(np.random.rand(3,1), autograd=True))

In [33]:
for i in range(10):
    pred = data.mm(w[0]).mm(w[1]) # прогноз (прямое распространение)
    
    loss = ((pred - target) * (pred - target)).sum(0) # сравнение
    
    loss.backward(Tensor(np.ones_like(loss.data))) # обучение (обратное распространение)

    for w_ in w:
        w_.data -= w_.grad.data * 0.1
        w_.grad.data *= 0
        
    print(loss)
    
    

[0.71958028]
[0.29231963]
[0.16302439]
[0.10055145]
[0.06026959]
[0.03496011]
[0.01967726]
[0.01078996]
[0.00579031]
[0.00305435]


In [9]:
np.random.seed(0)

data = Tensor(np.array([[0,0],
                        [0,1],
                        [1,0],
                        [1,1]]), autograd=True)

target = Tensor(np.array([[0],
                          [1],
                          [0],
                          [1]]), autograd=True)

w = list()
w.append(Tensor(np.random.rand(2, 3), autograd=True))
w.append(Tensor(np.random.rand(3, 1), autograd=True))

optim = SGD(parameters=w, alpha=0.1)

In [10]:
for i in range(10):
    pred = data.mm(w[0]).mm(w[1])
    
    loss = ((pred - target) * (pred - target)).sum(0)
    
    loss.backward(Tensor(np.ones_like(loss.data)))
    
    optim.step()

In [11]:
print(loss)

[0.06849361]


In [17]:
data = Tensor(np.array([[0,0],
                        [0,1],
                        [1,0],
                        [1,1]]), autograd=True)

target = Tensor(np.array([[0],
                          [1],
                          [0],
                          [1]]), autograd=True)

model = Sequential([Linear(2,3), Linear(3,1)])

optim = SGD(parameters=model.get_parameters(), alpha=0.05)

for i in range(10):
    pred = model.forward(data)
    
    loss = ((pred - target) * (pred-target)).sum(0)
    
    loss.backward(Tensor(np.ones_like(loss.data)))
    
    optim.step()
    
    print(loss)

[2.6865042]
[11.271212]
[38.09060898]
[9.34796148]
[2.28355956]
[0.93643926]
[0.61224936]
[0.43932735]
[0.32808561]
[0.24902135]


In [27]:
data = Tensor(np.array([[0,0],
                        [0,1],
                        [1,0],
                        [1,1]]), autograd=True)

target = Tensor(np.array([[0],
                          [1],
                          [0],
                          [1]]), autograd=True)

model = Sequential([Linear(2,3), Linear(3,1)])
criterion = MSELoss()

optim = SGD(parameters=model.get_parameters(), alpha=0.05)

for i in range(10):
    pred = model.forward(data)

    loss = criterion.forward(pred, target)

    loss.backward()

    optim.step()

    print(loss)

[37.32745542]
[45.43205051]
[15.89010067]
[59.70198105]
[14.62067437]
[12.71230094]
[3.10240059]
[0.91838762]
[0.33883533]
[0.21708456]


<h2>Нелинейные слои<h2>

In [5]:
np.random.seed(0)

data = Tensor(np.array([[0,0],
                        [0,1],
                        [1,0],
                        [1,1]]), autograd=True)

target = Tensor(np.array([[0],
                          [1],
                          [0],
                          [1]]), autograd=True)

model = Sequential([Linear(2, 3), Tanh(), Linear(3, 1), Sigmoid()])
criterion = MSELoss()

optim = SGD(parameters=model.get_parameters(), alpha=1)

for i in range(10):
    
    pred = model.forward(data)
    
    loss = criterion.forward(pred, target)
    
    loss.backward()
    
    optim.step()
    
    print(loss)


[1.06372865]
[0.75148144]
[0.57384259]
[0.39574294]
[0.2482279]
[0.15515294]
[0.10423398]
[0.07571169]
[0.05837623]
[0.04700013]


In [7]:
x = Tensor(np.eye(5), autograd=True)
x.index_select(Tensor([[1, 2, 3],
                       [2, 3, 4]])).backward()

x.grad

array([[0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1.],
       [2., 2., 2., 2., 2.],
       [2., 2., 2., 2., 2.],
       [1., 1., 1., 1., 1.]])

In [11]:
data = Tensor(np.array([1, 2, 1, 2]), autograd=True)
target =Tensor(np.array([ [0], [1], [0], [1] ]), autograd=True)

embed = Embedding(5, 3)
model = Sequential([embed, Tanh(), Linear(3,1), Sigmoid()])

optim = SGD(parameters=model.get_parameters(), alpha=0.5)

for i in range(10):
    
    pred = model.forward(data)
    loss = criterion.forward(pred, target)
    loss.backward()
    optim.step()
    
    print(loss)

[0.96547395]
[0.60816658]
[0.3887587]
[0.2551624]
[0.17722078]
[0.13074724]
[0.1014338]
[0.0818228]
[0.06801557]
[0.05787702]


In [14]:
np.random.seed(0)

data = Tensor(np.array([1, 2, 1, 2]), autograd=True)

target = Tensor(np.array([0, 1, 0, 1]), autograd=True)

model = Sequential([Embedding(3, 3), Tanh(), Linear(3, 4)])
criterion = CrossEntropyLoss()

optim = SGD(parameters=model.get_parameters(), alpha=0.1)

for i in range(10):
    
    pred = model.forward(data)
    
    loss = criterion.forward(pred, target)
    
    loss.backward()
    
    optim.step()
    
    print(loss)

1.3885032434928422
0.9558181509266037
0.6823083585795604
0.509525996749312
0.39574491472895856
0.31752527285348275
0.2617222861964217
0.22061283923954228
0.18946427334830068
0.16527389263866668


<h2>Тест рекурентной нейронной сети в собственном фреймворке<h2>

In [3]:
import sys, math, random
from collections import Counter


In [5]:
f = open('tasksv11/en/qa1_single-supporting-fact_train.txt', 'r')
raw = f.readlines()
f.close()


In [16]:
tokens = list()
for line in raw[0:1000]:
    tokens.append(line.lower().replace("\n", "").split(" ")[1:])

In [17]:
new_tokens = list()
for line in tokens:
    new_tokens.append(['-'] * (6 - len(line)) + line)
tokens = new_tokens

In [18]:
vocab = set()
for sent in tokens:
    for word in sent:
        vocab.add(word)
vocab = list(vocab)

In [22]:
word2index = {}
for i, word in enumerate(vocab):
    word2index[word] = i

def words2indices(sentence):
    idx = list()
    for word in sentence:
        idx.append(word2index[word])
    return idx

indices = list()
for line in tokens:
    idx = list()
    for w in line:
        idx.append(word2index[w])
    indices.append(idx)


In [31]:
data = np.array(indices)
embed = Embedding(vocab_size=len(vocab), dim=16)
model = RNNCell(n_input=16, n_hidden=16, n_output=len(vocab))

criterion = CrossEntropyLoss()
params = model.get_parameters() + embed.get_parameters()
optim = SGD(parameters=params, alpha=0.05)

In [34]:
for iter in range(1000):
    batch_size = 100
    total_loss = 0
    
    hidden = model.init_hidden(batch_size = batch_size)
    
    for t in range(5):
        input = Tensor(data[0:batch_size, t], autograd=True)
        rnn_input = embed.forward(input=input)
        output, hidden = model.forward(input=rnn_input, hidden=hidden)
    target = Tensor(data[0:batch_size, t+1], autograd=True)
    loss = criterion.forward(output, target)
    loss.backward()
    optim.step()
    
    total_loss += loss.data
    
    if iter % 200 == 0:
        p_correct = (target.data == np.argmax(output.data,axis=1)).mean()
        print_loss = total_loss / (len(data)/batch_size)
        print("Loss: ", print_loss," % Correct: ",p_correct)

Loss:  0.4746838345350352  % Correct:  0.0
Loss:  0.1728947457457735  % Correct:  0.28
Loss:  0.14833058061980758  % Correct:  0.36
Loss:  0.13826450318517364  % Correct:  0.36
Loss:  0.1350346229846787  % Correct:  0.37


In [41]:
batch_size = 1

hidden = model.init_hidden(batch_size=batch_size)

for t in range(5):
    input = Tensor(data[0:batch_size, t], autograd=True)
    rnn_input = embed.forward(input=input)
    output, hidden = model.forward(input=rnn_input, hidden=hidden)
target = Tensor(data[0:batch_size, t+1], autograd=True)
loss = criterion.forward(output, target)

ctx = ""
for idx in data[0:batch_size][0][0: -1]:
    ctx += vocab[idx] + " "

print("Context: ", ctx)
print("Pred: ", vocab[output.data.argmax()])

Context:  - mary moved to the 
Pred:  office.
