In [1]:
import numpy as np

In [2]:
a = np.array([1,2,3])
b = np.array([0.1, 0.2, 0.3])
c = np.array([-1, -0.5, 0])
d = np.array([0, 0, 0])

In [3]:
identity = np.eye(3)
identity

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [4]:
print(a.dot(identity))
print(b.dot(identity))
print(c.dot(identity))
print(d.dot(identity))

[1. 2. 3.]
[0.1 0.2 0.3]
[-1.  -0.5  0. ]
[0. 0. 0.]


In [5]:
this = np.array([2, 4, 6])
movie = np.array([10, 10, 10])
rocks = np.array([1, 1, 1])

In [6]:
print(this + movie + rocks)
print((this.dot(identity) + movie).dot(identity) + rocks) #последовательное умножение результатов на единичные матрицы

[13 15 17]
[13. 15. 17.]


<h3>внедрение матрицы перехода в векторное представление предложения<h3>

In [7]:
def softmax(x_):
    x = np.atleast_2d(x_)
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

In [8]:
word_vects = {}
word_vects['yankees'] = np.array([[0., 0., 0.]])
word_vects['bears'] = np.array([[0., 0., 0.]])
word_vects['braves'] = np.array([[0., 0., 0.]])
word_vects['red'] = np.array([[0., 0., 0.]])
word_vects['sox'] = np.array([[0., 0., 0.]])
word_vects['lose'] = np.array([[0., 0., 0.]])
word_vects['defeat'] = np.array([[0., 0., 0.]])
word_vects['beat'] = np.array([[0., 0., 0.]])
word_vects['tie'] = np.array([[0., 0., 0.]])

sent2output = np.random.rand(3, len(word_vects))

identity = np.eye(3)

In [9]:
layer_0 = word_vects['red']
layer_1 = layer_0.dot(identity) + word_vects['sox']
layer_2 = layer_1.dot(identity) + word_vects['defeat']

pred = softmax(layer_2.dot(sent2output))
pred

array([[0.11111111, 0.11111111, 0.11111111, 0.11111111, 0.11111111,
        0.11111111, 0.11111111, 0.11111111, 0.11111111]])

In [10]:
y = np.array([1, 0, 0, 0, 0, 0, 0, 0, 0])
alpha = 0.01

In [11]:
pred_delta = pred - y

#обратное распространение
layer_2_delta = pred_delta.dot(sent2output.T)
defeate_delta = layer_2_delta * 1
layer_1_delta = layer_2_delta.dot(identity.T)
sox_delta = layer_1_delta * 1
layer_0_delta = layer_1_delta.dot(identity.T)


In [12]:
word_vects['red'] -= layer_0_delta * alpha
word_vects['sox'] -= sox_delta * alpha
word_vects['defeat'] -= defeate_delta * alpha

identity -= np.outer(layer_0, layer_1_delta) * alpha
identity -= np.outer(layer_1,layer_2_delta) * alpha
sent2output -= np.outer(layer_2, pred_delta) * alpha

Обучение рекурентной нейронной сети на наборе данных Babi

In [75]:
import sys, random, math
from collections import Counter

In [76]:
f = open('tasksv11/en/qa1_single-supporting-fact_train.txt','r')
raw = f.readlines()
f.close()

In [77]:
tokens = list()
for line in raw[0:1000]:
    tokens.append(line.lower().replace("\n","").split(" ")[1:])
tokens[0:4]

[['mary', 'moved', 'to', 'the', 'bathroom.'],
 ['john', 'went', 'to', 'the', 'hallway.'],
 ['where', 'is', 'mary', 'bathroom1'],
 ['daniel', 'went', 'back', 'to', 'the', 'hallway.']]

In [78]:
vocab = set()
for sent in tokens:
    for word in sent:
        vocab.add(word)

vocab = list(vocab)

word2index = {}
for i,word in enumerate(vocab):
    word2index[word]=i


In [79]:
def words2indices(sentence):
    idx = list()
    for word in sentence:
        idx.append(word2index[word])
    return idx

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)


In [101]:
np.random.seed(1)
embed_size = 10

embed = (np.random.rand(len(vocab), embed_size) - 0.5) * 0.1 #вложения слов

recurrent = np.eye(embed_size) # рекурентная матрица (первоначально единичная)

start = np.zeros(embed_size) #векторное представление для пустого предложения

decoder = (np.random.rand(embed_size, len(vocab)) - 0.5) * 0.1 #выходные веса для прогнозирования векторного представления

one_hot = np.eye(len(vocab)) # матрица поиска выходных весов (для функции потерь)

In [102]:
def predict(sent):

    layers = list()
    layer = {}
    layer['hidden'] = start
    layers.append(layer)

    loss = 0

    # прямое распространение
    preds = list()
    for target_i in range(len(sent)):

        layer = {}

        # попытка предсказать следующее слово
        layer['pred'] = softmax(layers[-1]['hidden'].dot(decoder))

        loss += -np.log(layer['pred'][sent[target_i]])

        # сгенерировать следующее состояние скрытого слоя
        layer['hidden'] = layers[-1]['hidden'].dot(recurrent) + embed[sent[target_i]]
        layers.append(layer)

    return layers, loss

In [103]:
#прямое распространение
for iter in range(3 * 10**4):
    alpha = 0.001
    sent = words2indices(tokens[iter%len(tokens)][1:])
    layers, loss = predict(sent)
    
    #обратное распространение
    for layer_idx in reversed(range(len(layers))):
        layer = layers[layer_idx]
        target = sent[layer_idx - 1]
        
        if layer_idx > 0: #если не первый слой
            layer['output_delta'] = layer['pred'] - one_hot[target]
            new_hidden_delta = layer['output_delta'].dot(decoder.transpose())
            
            if layer_idx == len(layers) - 1:
                layer['hidden_delta'] = new_hidden_delta
            else:
                layer['hidden_delta'] = new_hidden_delta + layers[layer_idx+1]['hidden_delta'].dot(recurrent.transpose())
        
        else: #если это последний слой, то не добавлять последующий - его не существует
            layer['hidden_delta'] = layers[layer_idx+1]['hidden_delta'].dot(recurrent.transpose())
    
    #обновление весов
    start -= layers[0]['hidden_delta'] * alpha / float(len(sent))
    for layer_id, layer in enumerate(layers[1:]):
        
        decoder -= np.outer(layers[layer_id]['hidden'], layer['output_delta']) * alpha / float(len(sent))
        
        embed_idx = sent[layer_id]
        embed[embed_idx] -= layers[layer_id]['hidden_delta'] * alpha / float(len(sent))
        
        recurrent -= np.outer(layers[layer_id]['hidden'], layer['hidden_delta']) * alpha / float(len(sent))
        
        
    if iter % 1000 == 0:
        print(f'Perplexity: {np.exp(loss/len(sent))}')

Perplexity: 78.20868017216667
Perplexity: 78.10566822739345
Perplexity: 77.9845660253729
Perplexity: 77.79172985430485
Perplexity: 77.42977689503837
Perplexity: 76.68977825671534
Perplexity: 75.04031223991281
Perplexity: 70.67601114041365
Perplexity: 52.76121318315084
Perplexity: 27.6521775197623
Perplexity: 20.082749219373415
Perplexity: 18.701987121639597
Perplexity: 17.58634079446219
Perplexity: 16.106745227499466
Perplexity: 13.787581371315463
Perplexity: 10.553443254420703
Perplexity: 7.90364018447732
Perplexity: 6.61062435741281
Perplexity: 5.8044089928827205
Perplexity: 5.305997329389027
Perplexity: 4.9807269906102505
Perplexity: 4.7527022097737035
Perplexity: 4.601530599604509
Perplexity: 4.517506029586688
Perplexity: 4.469466553929598
Perplexity: 4.414889118769853
Perplexity: 4.342929109060579
Perplexity: 4.258303739268453
Perplexity: 4.168476693419766
Perplexity: 4.082663633172574


In [104]:
sent_index = 4

l,_ = predict(words2indices(tokens[sent_index]))

print(tokens[sent_index])

for i,each_layer in enumerate(l[1:-1]):
    input = tokens[sent_index][i]
    true = tokens[sent_index][i+1]
    pred = vocab[each_layer['pred'].argmax()]
    print("Prev Input:" + input + (' ' * (12 - len(input))) + \
          "True:" + true + (" " * (15 - len(true))) + "Pred:" + pred)

['sandra', 'moved', 'to', 'the', 'garden.']
Prev Input:sandra      True:moved          Pred:is
Prev Input:moved       True:to             Pred:to
Prev Input:to          True:the            Pred:the
Prev Input:the         True:garden.        Pred:bedroom.


In [105]:
sent_index = 1

l,_ = predict(words2indices(tokens[sent_index]))

print(tokens[sent_index])

for i,each_layer in enumerate(l[1:-1]):
    input = tokens[sent_index][i]
    true = tokens[sent_index][i+1]
    pred = vocab[each_layer['pred'].argmax()]
    print("Prev Input:" + input + (' ' * (12 - len(input))) + \
          "True:" + true + (" " * (15 - len(true))) + "Pred:" + pred)

['john', 'went', 'to', 'the', 'hallway.']
Prev Input:john        True:went           Pred:is
Prev Input:went        True:to             Pred:to
Prev Input:to          True:the            Pred:the
Prev Input:the         True:hallway.       Pred:bedroom.


In [106]:
sent_index = 23

l,_ = predict(words2indices(tokens[sent_index]))

print(tokens[sent_index])

for i,each_layer in enumerate(l[1:-1]):
    input = tokens[sent_index][i]
    true = tokens[sent_index][i+1]
    pred = vocab[each_layer['pred'].argmax()]
    print("Prev Input:" + input + (' ' * (12 - len(input))) + \
          "True:" + true + (" " * (15 - len(true))) + "Pred:" + pred)

['where', 'is', 'sandra', 'bathroom2']
Prev Input:where       True:is             Pred:is
Prev Input:is          True:sandra         Pred:to
Prev Input:sandra      True:bathroom2      Pred:the
