# Long - Short Term Memory Networks

![texto alternativo](http://colah.github.io/posts/2015-08-Understanding-LSTMs/img/RNN-shorttermdepdencies.png)

# **Setup Example**

Create a simple Word prediction LSTM. It takes a fairytale (or story) from a text file to analyze words as sequences, in order to predict next word based on previous ones. The main purpose of the model is taking several initial words and provide a story based on words learnt during training stage.

In [2]:
'''
A Long - Short Term Memory Networks (LSTM) implementation using TensorFlow..
A prediction of a word after n_input words learned from text file.
A story is automatically generated if some initial words are provided to
feed the model as input. 
'''


from google.colab import drive
drive.mount('/content/drive/',force_remount=True)
import os
os.chdir('/content/drive/My Drive/')
print(os.listdir())

import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.nn import rnn_cell
import random
import collections
import time




Mounted at /content/drive/
['Plantilla TEcnologicas', '~$planillas.xlsx', 'Control Digital, Teoría y Práctica 2Ed- Luis Eduardo García Jaimes.pdf', 'ingenieria-de-control-moderna-ogata-5ed (1).pdf', 'Shahed University-DiscreteTimeControlSystems_Ogata-2nd ed.pdf', 'Yernin', 'Semillero.xlsx', 'planillas.xlsx', 'planilla Semilleros.xlsx', 'Yomin', 'Documento Maestro MEB', 'Micros', 'Cuentas', 'Cuentas Construcción', 'Notas Analoga.xlsx', 'Notas Auto2.xlsx', 'Notas Auto 3.xlsx', 'Agenda Paceintes Estudio Bioimpedancia.xlsx', 'ProyectoMetilaciónCienciasForenses_LuisaRestrepov4.docx', 'Articulo EEG.pdf', 'PCA-CLAIB', 'Tesis Maestria', 'proyecto UdeA', 'Pupila', 'evidencias plan de trabajo', 'Certificadocatedra.pdf', 'Copia de Base de datos Eventos CiBi_2017-2018-2.xlsx', 'Jose-Daniel Ojo', 'Correcciones Tesis.zip', '20171103_234148.avi', 'Valoración del curso.gform', 'Valoración de la clase.gform', 'Test de autoevaluación en blanco.gform', 'IB.gform', 'MEB.gform', 'Base de datos Even

In [5]:
start_time = time.time()

# Define a log file to sum up our model
# Conveniently, the log will be stored in our data path 
#data_path = "data/My Drive/Colab Notebooks/Datasets/LSTM_words/"
#writer = tf.summary.FileWriter(data_path)

# Text file containing words for training
training_file = 'Normas.txt'

# Reading text file
def read_data(fname):
    with open(fname) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    content = [word for i in range(len(content)) for word in content[i].split()]
    content = np.array(content)
    return content

training_data = read_data(training_file)
print("Training data loaded...")

Training data loaded...


In [6]:
def build_dataset(words):
    count = collections.Counter(words).most_common()
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary

dictionary, reverse_dictionary = build_dataset(training_data)
vocab_size = len(dictionary)
print(dictionary.values())

dict_values([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 21

In [0]:

# Define parameters
learning_rate = 0.001
n_input = 3
num_epochs = 50000
num_classes = 2
echo_step = 500
#echo_step = 1000
batch_size = 5
words_to_predict = 10


# number of units in RNN cell
n_hidden = 512


In [0]:
tf.reset_default_graph()
# --- Create placeholders
batchX_placeholder = tf.placeholder(tf.float32, [None, n_input , 1])
batchY_placeholder = tf.placeholder(tf.float32, [None, vocab_size])

init_state = tf.placeholder(tf.float32, [batch_size, vocab_size])

# --- Weights, Bias initialization
W = tf.Variable(np.random.rand(n_hidden, vocab_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1, vocab_size)), dtype=tf.float32)


Training considering previous states.

![texto alternativo](http://colah.github.io/posts/2015-08-Understanding-LSTMs/img/LSTM3-chain.png)

![alt text](https://deeplearning4j.org/img/greff_lstm_diagram.png)

In [9]:

def model(input_placeholder, weights, biases):
    
    # reshape to [1, n_input]
    input_placeholder = tf.reshape(input_placeholder, [-1, n_input ])
    
    # Generate a n_input-element sequence of inputs
    # (eg. [had] [a] [general] -> [20] [6] [33])
    input_placeholder = tf.split(input_placeholder, n_input,1)
    
    # 1-layer LSTM with n_hidden units but with lower accuracy.
    # Average Accuracy= 90.60% 50k iter
    cell = rnn_cell.LSTMCell (n_hidden, reuse=tf.AUTO_REUSE)
    
    # 2-layer LSTM, each layer has n_hidden units.
    # Average Accuracy= 95.20% at 50k iter
    # cell = rnn.MultiRNNCell([rnn_cell.LSTMCell(n_hidden), rnn_cell.LSTMCell(n_hidden)])
    
    # generate prediction
    outputs, states = rnn.static_rnn(cell, input_placeholder, dtype=tf.float32)
    
    # there are n_input outputs but
    # we only want the last output
    return tf.matmul(outputs[-1], weights) + biases

predictions = model(batchX_placeholder, W, b)



Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell, unroll=True)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [10]:

# Loss and optimizer

total_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = predictions , labels= batchY_placeholder ))
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(total_loss)

# Model evaluation
# Introduce the accuracy estimation based on predictions
correct_predictions = tf.equal(tf.argmax(predictions ,1), tf.argmax(batchY_placeholder ,1))
accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


To realize predictions, we will extract the prediction probabilities for number in a sequence of words. The idea lays in achieve good prediction in the words according to the initial story.


![texto alternativo](https://cdn-images-1.medium.com/max/800/1*XAJdt_EbedqDlrTT9eqWvQ.png)

In [11]:

# Launch the graph

with tf.Session() as session:
    session.run(init)
    step = 0
    offset = random.randint(0,n_input+1)
    end_offset = n_input + 1
    acc_total = 0
    loss_list = 0

    #writer.add_graph(session.graph)
    
    # while step < training_iters:
    for epoch_idx in range(num_epochs): #
        
        # Generate a minibatch. Add some randomness on selection process.
        if offset > (len(training_data)-end_offset):
            offset = random.randint(0, n_input+1)
        
        # Define the input words per batch
        symbols_in_keys = [ [dictionary[ str(training_data[i])]] for i in range(offset, offset+n_input) ]
        symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
        
        # Define the label of words per batch
        symbols_out_onehot = np.zeros([vocab_size], dtype=float)
        symbols_out_onehot[dictionary[str(training_data[offset+n_input])]] = 1.0
        symbols_out_onehot = np.reshape(symbols_out_onehot,[1,-1])
        
        # Feed the graph
        #print(symbols_in_keys.shape,symbols_out_onehot.shape)
        _, acc, loss, onehot_pred = session.run([optimizer, accuracy, total_loss, predictions], \
                                                feed_dict={batchX_placeholder: symbols_in_keys , \
                                                           batchY_placeholder: symbols_out_onehot })
        loss_list += loss
        acc_total += acc
        #print("Here I'am")
        if (epoch_idx+1) % echo_step == 0:
            
            print("Step = " + str(epoch_idx+1) + ", Loss = " + \
                  "{:.6f}".format(loss_list/echo_step) + ", Accuracy= " + \
                  "{:.2f}%".format(100 * acc_total / echo_step))
            acc_total = 0
            loss_list = 0
            symbols_in = [training_data[i] for i in range(offset, offset + n_input)]
            symbols_out = training_data[offset + n_input]
            symbols_out_pred = reverse_dictionary[int(tf.argmax(onehot_pred, 1).eval())]
            print("%s - [%s] vs [%s]" % (symbols_in, symbols_out, symbols_out_pred))
        #step += 1
        offset += (n_input+1)
    
    print("Optimization Finished!")
    
    
    flag = True
    while flag == True:
        prompt = "Write %s words: " % n_input
        sentence = input(prompt)
        sentence = sentence.strip()
        words = sentence.split(' ')
        
        if words[0] == '1':
            flag = False
            break
        
        if len(words) != n_input:
            print ("Wrong num of words")
            continue
        try:
        #if True:
            symbols_in_keys = [dictionary[str(words[i])] for i in range(len(words))]
            for i in range(words_to_predict):
                keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
                onehot_pred = session.run(predictions, feed_dict={batchX_placeholder: keys})
                onehot_pred_index = int(tf.argmax(onehot_pred, 1).eval())
                sentence = "%s %s" % (sentence,reverse_dictionary[onehot_pred_index])
                symbols_in_keys = symbols_in_keys[1:]
                symbols_in_keys.append(onehot_pred_index)
            print(sentence)
            
        except:
        #else:
            print("Word not in dictionary")

Step = 500, Loss = 7.816436, Accuracy= 5.60%
['I', 'nivel', 'de'] - [atención,] vs [atención]
Step = 1000, Loss = 6.653513, Accuracy= 9.60%
['•', 'Diagnóstico', 'de'] - [ingreso.] vs [hora]
Step = 1500, Loss = 6.497573, Accuracy= 7.00%
['de', 'cama', 'y'] - [servicio.] vs [examen]
Step = 2000, Loss = 6.501848, Accuracy= 8.60%
['la', 'información.', '•'] - [El] vs [la]
Step = 2500, Loss = 6.345580, Accuracy= 14.80%
['sector', 'público', 'y'] - [privado,] vs [la]
Step = 3000, Loss = 6.367979, Accuracy= 14.20%
['población', 'asignada,', 'las'] - [carpetas] vs [el]
Step = 3500, Loss = 6.212954, Accuracy= 11.00%
['atención.', '•', 'Enfermedad'] - [actual:] vs [de]
Step = 4000, Loss = 6.106410, Accuracy= 11.40%
['grafica', 'la', 'evolución'] - [del] vs [de]
Step = 4500, Loss = 6.208098, Accuracy= 10.60%
['Historias', 'Clínicas', 'se'] - [archivan] vs [de]
Step = 5000, Loss = 6.444111, Accuracy= 11.00%
['uso', 'de', 'la'] - [Historia] vs [Historia]
Step = 5500, Loss = 6.553307, Accuracy= 11.6

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 730, in _input_request
    ident, reply = self.session.recv(self.stdin_socket, 0)
  File "/usr/local/lib/python3.6/dist-packages/jupyter_client/session.py", line 803, in recv
    msg_list = socket.recv_multipart(mode, copy=copy)
  File "/usr/local/lib/python3.6/dist-packages/zmq/sugar/socket.py", line 466, in recv_multipart
    parts = [self.recv(flags, copy=copy, track=track)]
  File "zmq/backend/cython/socket.pyx", line 790, in zmq.backend.cython.socket.Socket.recv
  File "zmq/backend/cython/socket.pyx", line 826, in zmq.backend.cython.socket.Socket.recv
  File "zmq/backend/cython/socket.pyx", line 188, in zmq.backend.cython.socket._recv_copy
  File "zmq/backend/cython/checkrc.pxd", line 12, in zmq.backend.cython.checkrc._check_rc
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/li

KeyboardInterrupt: ignored