# LSTMCell

In [1]:
import numpy as np
from layers.utils.LSTMCell import LSTMCell
from model.Model import Model
from utils.model_loss import cross_entropy_loss_npdl

In [2]:
def create_toy_cell():
    np.random.seed(0)
    weights = {'Wc_i': np.random.rand(3, 5),
               'Wu_i': np.random.rand(3, 5),
               'Wf_i': np.random.rand(3, 5),
               'Wo_i': np.random.rand(3, 5),
               'Wc_h': np.random.rand(5, 5),
               'Wu_h': np.random.rand(5, 5),
               'Wf_h': np.random.rand(5, 5),
               'Wo_h': np.random.rand(5, 5)}
    biases = {'bc': np.zeros(5),
              'bu': np.zeros(5),
              'bf': np.zeros(5),
              'bo': np.zeros(5)}
    np.random.seed(0)
    return LSTMCell(3, 5, weights, biases)

def create_toy_data():
    np.random.seed(1)
    return np.random.uniform(-1, 1, (5, 3)), np.random.uniform(-1, 1, (5, 5)), np.random.uniform(-1, 1, (5, 5)), np.array([0, 1, 1, 4, 2])

In [3]:
cell = create_toy_cell()
x, h, c, y = create_toy_data()

In [4]:
correct_h_next = np.array([[ 0.05432073, -0.0240766, -0.08374854, 0.00076676, -0.14279187],
                           [-0.26776613, 0.31730641, -0.21700223, -0.28390231, -0.26996079],
                           [-0.13447024, -0.08660263, -0.10022437, -0.03162882, -0.05513277],
                           [-0.07544577, 0.17997456, -0.08875356, 0.28115015, 0.28349453],
                           [-0.20265419, -0.04393417, 0.15789945, -0.00593344, -0.18382524]])

correct_c_next = np.array([[ 0.14534732, -0.12841532, -0.17270371, 0.0028584, -0.33097237],
                           [-0.40553132, 0.52289938, -0.46666314, -0.41915372, -0.52302227],
                           [-0.5563115, -0.20751267, -0.33762134, -0.14975814, -0.34766355],
                           [-0.12754703, 0.35234149, -0.1480342, 0.517628, 0.47883466],
                           [-0.30773508, -0.14781391, 0.21382212, -0.01451843, -0.31231354]])

c_next, h_next, h_up  = cell.forward_npdl(x, h, c)

In [5]:
print('Your h_next:')
print(h_next)
print()
print('correct h_next:')
print(correct_h_next)
print()

# La différence devrait être assez basse, en principe inférieure à 1e-7.
print('Difference between your h_next and correct h_next: ', np.sum(np.abs(h_next - correct_h_next)))

Your h_next:
[[ 0.05432073 -0.0240766  -0.08374854  0.00076676 -0.14279187]
 [-0.26776613  0.31730641 -0.21700223 -0.28390231 -0.26996079]
 [-0.13447024 -0.08660263 -0.10022437 -0.03162882 -0.05513277]
 [-0.07544577  0.17997456 -0.08875356  0.28115015  0.28349453]
 [-0.20265419 -0.04393417  0.15789945 -0.00593344 -0.18382524]]

correct h_next:
[[ 0.05432073 -0.0240766  -0.08374854  0.00076676 -0.14279187]
 [-0.26776613  0.31730641 -0.21700223 -0.28390231 -0.26996079]
 [-0.13447024 -0.08660263 -0.10022437 -0.03162882 -0.05513277]
 [-0.07544577  0.17997456 -0.08875356  0.28115015  0.28349453]
 [-0.20265419 -0.04393417  0.15789945 -0.00593344 -0.18382524]]

Difference between your h_next and correct h_next:  5.8426519119331166e-08


In [6]:
print('Your c_next:')
print(c_next)
print()
print('correct c_next:')
print(correct_c_next)
print()

# La différence devrait être assez basse, en principe inférieure à 1e-7.
print('Difference between your c_next and correct c_next: ', np.sum(np.abs(c_next - correct_c_next)))

Your c_next:
[[ 0.14534732 -0.12841532 -0.17270371  0.0028584  -0.33097237]
 [-0.40553132  0.52289938 -0.46666314 -0.41915372 -0.52302227]
 [-0.5563115  -0.20751267 -0.33762134 -0.14975814 -0.34766355]
 [-0.12754703  0.35234149 -0.1480342   0.517628    0.47883466]
 [-0.30773508 -0.14781391  0.21382212 -0.01451843 -0.31231354]]

correct c_next:
[[ 0.14534732 -0.12841532 -0.17270371  0.0028584  -0.33097237]
 [-0.40553132  0.52289938 -0.46666314 -0.41915372 -0.52302227]
 [-0.5563115  -0.20751267 -0.33762134 -0.14975814 -0.34766355]
 [-0.12754703  0.35234149 -0.1480342   0.517628    0.47883466]
 [-0.30773508 -0.14781391  0.21382212 -0.01451843 -0.31231354]]

Difference between your c_next and correct c_next:  6.203414870198723e-08


# TimeDistributed

In [7]:
from layers.Dense import Dense
from layers.TimeDistributed import TimeDistributed
from utils.model_loss import td_cross_entropy_loss_npdl

In [8]:
def create_toy_timedist():
    np.random.seed(0)
    return TimeDistributed(Dense(dim_input=5, dim_output=2), out_size=2)

def create_toy_sequence():
    np.random.seed(1)
    return np.random.uniform(-1, 1, (3, 4, 5)), np.array([[1, 1, 1, 0],
                                                          [1, 1, 0, 1],
                                                          [0, 1, 1, 1]])  

In [9]:
layer = create_toy_timedist()
x, y = create_toy_sequence()

In [10]:
z = layer.forward_npdl(x)
z

array([[[-2.03128610e-04,  1.66784694e-04],
        [-2.83343210e-04, -1.36738569e-04],
        [-2.10377603e-05,  8.40539508e-05],
        [ 3.77615030e-06, -4.87965243e-05]],

       [[ 1.56853284e-04,  2.95488224e-04],
        [-1.84717967e-04, -2.32514097e-05],
        [ 1.62175389e-05, -1.42246891e-04],
        [-2.92006383e-07,  3.85406131e-05]],

       [[ 2.02205524e-04,  1.51920385e-04],
        [-4.81784643e-05,  1.95335290e-04],
        [-2.86652305e-04,  1.04453634e-04],
        [-2.62175136e-04,  8.02224474e-05]]])

In [11]:
loss, dScores, softmax_output = td_cross_entropy_loss_npdl(z, y, 0.0, {'l1': layer.get_params()})

In [12]:
dX = layer.backward_npdl(dScores)
dX

array([[[ 2.27273813e-05, -2.10320295e-05,  4.74051616e-05,
          1.83540318e-05, -8.56203866e-06],
        [ 2.27299193e-05, -2.10343783e-05,  4.74104556e-05,
          1.83560815e-05, -8.56299483e-06],
        [ 2.27303912e-05, -2.10348149e-05,  4.74114398e-05,
          1.83564625e-05, -8.56317258e-06],
        [-2.27309881e-05,  2.10353673e-05, -4.74126848e-05,
         -1.83569445e-05,  8.56339746e-06]],

       [[ 2.27300099e-05, -2.10344621e-05,  4.74106446e-05,
          1.83561546e-05, -8.56302895e-06],
        [ 2.27297504e-05, -2.10342220e-05,  4.74101033e-05,
          1.83559450e-05, -8.56293119e-06],
        [-2.27297846e-05,  2.10342535e-05, -4.74101745e-05,
         -1.83559726e-05,  8.56294405e-06],
        [ 2.27311443e-05, -2.10355118e-05,  4.74130106e-05,
          1.83570707e-05, -8.56345629e-06]],

       [[-2.27310141e-05,  2.10353914e-05, -4.74127391e-05,
         -1.83569655e-05,  8.56340725e-06],
        [ 2.27288179e-05, -2.10333590e-05,  4.74081582e-05,


# Sentiment analysis

In [13]:
import numpy as np
import pandas as pd
from bpemb import BPEmb

In [14]:
# Dataset: Stanford Sentiment Treebank V1.0
dictionary = pd.read_csv('datasets/stanfordSentimentTreebank/dictionary.txt', header=None, sep='|')
dictionary = dictionary.rename(columns={0:'phrase', 1:'phrase_id'})

dataset_split = pd.read_csv('datasets/stanfordSentimentTreebank/datasetSplit.txt', sep=',')

dataset_sentences = pd.read_csv('datasets/stanfordSentimentTreebank/datasetSentences.txt', sep='\t')

dataset_labels = pd.read_csv('datasets/stanfordSentimentTreebank/sentiment_labels.txt', sep='|')
dataset_labels = dataset_labels.rename(columns={'phrase ids':'phrase_id', 'sentiment values':'sentiment'})

In [15]:
# Obtenir les phrase_id des sentence
sentences_merged = dataset_sentences.merge(dictionary, left_on='sentence', right_on='phrase', how='left').drop(columns=['phrase'])

# Retirer les sentence qui n'ont pas de phrase_id
sentences_clean = sentences_merged[~sentences_merged.phrase_id.isnull()]

In [16]:
# obtenir les labels
sentences_with_labels = sentences_clean.merge(dataset_labels, on='phrase_id', how='left').drop(columns=['phrase_id'])

In [17]:
# separation train - valid - test
sentences_split = sentences_with_labels.merge(dataset_split, on='sentence_index')

# Embeddings

In [18]:
bpemb_en = BPEmb(lang="en", dim=25, vs=100000)



In [19]:
def call_embed(value, embedder, max_length):
    emb = embedder.embed(value)
    return np.pad(emb, ((0, max_length - emb.shape[0]), (0, 0)), 'constant', constant_values=(0))

def get_longest(value, embedder):
    emb = embedder.embed(value)
    return emb.shape[0]

def convert_sentiment(value):
    if value <= 0.4:
        return 0
    if value <= 0.6:
        return 1
    return 2
    

sentences_split['len'] = sentences_split.apply(lambda x: get_longest(x['sentence'], bpemb_en), axis=1)

max_len = sentences_split.len.max()
print(max_len)

sentences_split['embedding'] = sentences_split.apply(lambda x: call_embed(x['sentence'], bpemb_en, max_len), axis=1)
sentences_split['sentiment_label'] = sentences_split.apply(lambda x: convert_sentiment(x['sentiment']), axis=1)

58


In [20]:
train = sentences_split[sentences_split.splitset_label == 1].drop(columns=['splitset_label'])
valid = sentences_split[sentences_split.splitset_label == 2].drop(columns=['splitset_label'])
test = sentences_split[sentences_split.splitset_label == 3].drop(columns=['splitset_label'])

In [21]:
train_data = np.array(train.embedding.tolist())
valid_data = np.array(valid.embedding.tolist())
test_data = np.array(test.embedding.tolist())

train_labels = np.array(train.sentiment_label.tolist())
valid_labels = np.array(valid.sentiment_label.tolist())
test_labels = np.array(test.sentiment_label.tolist())

train_data = np.concatenate((train_data, test_data), axis=0)
train_labels = np.concatenate((train_labels, test_labels), axis=0)

# Modèle LSTM

In [22]:
from model.Model import Model
from layers.LSTM import LSTM
from layers.Dense import Dense
from layers.Flatten import Flatten
from utils.model_loss import cross_entropy_loss_npdl

def create_lstm_network():
    model = Model()
    
    lstm1 = LSTM(58, 25, 50, weight_scale=None)
    dense1 = Dense(50, 3, weight_scale=None)
    
    flatten = Flatten()
    
    model.add(lstm1)
    model.add(flatten)
    model.add(dense1)
    model.add_loss(cross_entropy_loss_npdl)
    return model


In [23]:
model = create_lstm_network()
predictions = model.predict(test_data[:32])
print(predictions)

[0 1 2 0 0 0 1 2 2 2 2 2 1 2 1 0 2 2 0 0 0 0 2 2 2 2 2 1 1 2 2 1]


In [24]:
scores = model.forward_npdl(train_data[:32])
loss, dScores, softmax_output = model.calculate_loss(scores, train_labels[:32], 0.0)
print(loss)

1.078290409631384


In [25]:
from model.Solver import epoch_solver_npdl, Adam, SGD

model = create_lstm_network()

optimizer = Adam(1e-3, model)
    
loss_history, train_accuracy_history, val_accuracy_history = epoch_solver_npdl(train_data, 
                                                                          train_labels,
                                                                          valid_data,
                                                                          valid_labels,
                                                                          2e-4,
                                                                          optimizer,
                                                                          lr_decay=0.95,
                                                                          batch_size=16,
                                                                          epochs=10)

(batch 20 / 573) loss: 0.974579
(batch 40 / 573) loss: 1.152193
(batch 60 / 573) loss: 1.014861
(batch 80 / 573) loss: 1.117595
(batch 100 / 573) loss: 1.080216
(batch 120 / 573) loss: 1.094187
(batch 140 / 573) loss: 0.971715
(batch 160 / 573) loss: 0.985145
(batch 180 / 573) loss: 1.049306
(batch 200 / 573) loss: 1.103454
(batch 220 / 573) loss: 1.116618
(batch 240 / 573) loss: 1.242435
(batch 260 / 573) loss: 1.239236
(batch 280 / 573) loss: 1.271869
(batch 300 / 573) loss: 1.034407
(batch 320 / 573) loss: 1.084623
(batch 340 / 573) loss: 1.165341
(batch 360 / 573) loss: 0.925095
(batch 380 / 573) loss: 1.015803
(batch 400 / 573) loss: 0.995433
(batch 420 / 573) loss: 1.047951
(batch 440 / 573) loss: 0.919563
(batch 460 / 573) loss: 1.009250
(batch 480 / 573) loss: 0.905819
(batch 500 / 573) loss: 0.985285
(batch 520 / 573) loss: 0.976965
(batch 540 / 573) loss: 1.161027
(batch 560 / 573) loss: 1.078440
(epoch 1 / 10) loss: 1.039511, train_acc: 0.478660, val_acc: 0.505412
(batch 20 