In [66]:
import re
import numpy as np
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import Dropout
from keras.layers import LSTM
from keras.optimizers import RMSprop
import math
import random
import sys
import matplotlib.pyplot as plt
from keras.utils import plot_model
import pandas as pd
import ezodf

In [67]:


def read_ods(filename, sheet_no=0, header=0):
    tab = ezodf.opendoc(filename=filename).sheets[sheet_no]
    return pd.DataFrame({col[header].value:[x.value for x in col[header+1:]]
                         for col in tab.columns()})
x =  read_ods(filename = "sentenceGenerator.ods")
sentences = x["act"]+" "+ x["sentence"]
print(x)
modSentence = ""
for eachSentence in sentences:
    modSentence += eachSentence + " "
print(modSentence)

              act                                           sentence slots
0           greet                 Hey there! How can I help you EOS.  None
1           greet       Hi is there anything I can help you with.EOS  None
2           greet                         Hey how may I Help you EOS  None
3           greet  Hello. What is your query? How may I help you EOS  None
4             bye                     Good bye, have a nice day. EOS  None
5             bye                 Thank you. Take care good bye. EOS  None
6   requestNumber                can I have your request number. EOS  None
7   requestNumber  Can you please provide me your request number....  None
8   requestNumber  I need your request number to go ahead with yo...  None
9           greet                 Hey there! How can I help you EOS.  None
10          greet       Hi is there anything I can help you with.EOS  None
11          greet                         Hey how may I Help you EOS  None
12          greet  Hello.

### Load File and Build Vocabulary

In [81]:
data_path = "test_data_long.txt"
#raw_text = open(data_path).read().lower()
raw_text = modSentence.lower()
import re
# generate list of unique characters, but only include words and some punctuation marks
pattern = re.compile('[a-z]+|\!|\n|\.|,|;')
all_words = re.findall(pattern, raw_text)
#print(all_words)

unique_words = sorted(set(all_words))

word_to_int = dict((c, i) for i, c in enumerate(unique_words))

# print(unique_words)
print(word_to_int)

# later used to make outputs more readable by converting ints back to characters
int_to_word = dict((i, c) for i, c in enumerate(unique_words))

total_num_words = len(all_words)
len_vocab = len(unique_words)

print("Total number of words:\t" + str(total_num_words))
print("Length of vocabulary:\t" + str(len_vocab))

{'!': 0, ',': 1, '.': 2, 'a': 3, 'ahead': 4, 'anything': 5, 'bye': 6, 'can': 7, 'care': 8, 'day': 9, 'eos': 10, 'go': 11, 'good': 12, 'greet': 13, 'have': 14, 'hello': 15, 'help': 16, 'hey': 17, 'hi': 18, 'how': 19, 'i': 20, 'is': 21, 'may': 22, 'me': 23, 'need': 24, 'nice': 25, 'number': 26, 'please': 27, 'provide': 28, 'query': 29, 'request': 30, 'requestnumber': 31, 'take': 32, 'thank': 33, 'there': 34, 'to': 35, 'what': 36, 'with': 37, 'you': 38, 'your': 39}
Total number of words:	291
Length of vocabulary:	40


### Create Training Data from Text File

In [69]:
def eosStuffing(ip):
    while(len(ip) < 5):
        ip.append("eos")
    return ip


In [70]:
#MODIFYING PREPROCESSING DATA
sequence_length = 4 # max number of words to consider at a time.
                    # this means that each trainig set (training pattern) will be comprised of 20 time steps
step_window = 1

# set up x and y
# convert the words into integers
x_data = [] # list of lists
y_data = []
sents = []
for each in sentences:
    sents.append(each.lower())
for j in range(len(sents)):
    pattern = re.compile('[a-z]+|\!|\n|\.|,|;')
    all_words = re.findall(pattern, sents[j])
    print(all_words)
    for i in range(0, len(all_words) - sequence_length, step_window):

        # extract the first n words (length sequence_length): our "x"
        sequence_in = all_words[i : i+sequence_length]
        print("in", sequence_in)

        # extract last word for this window: our "y" (target)
        word_out = all_words[i+sequence_length]
        print("out", word_out)

        # print('\nx: ' + str(sequence_in) + '\n' + 'y: ' + word_out)
        # print()

        # store corresponding integer for each character in the input sequence
        x_data.append(sequence_in)
        y_data.append(word_out)

num_train_patters = len(x_data)
print('Total patterns:\t' + str(num_train_patters))

['greet', 'hey', 'there', '!', 'how', 'can', 'i', 'help', 'you', 'eos', '.']
in ['greet', 'hey', 'there', '!']
out how
in ['hey', 'there', '!', 'how']
out can
in ['there', '!', 'how', 'can']
out i
in ['!', 'how', 'can', 'i']
out help
in ['how', 'can', 'i', 'help']
out you
in ['can', 'i', 'help', 'you']
out eos
in ['i', 'help', 'you', 'eos']
out .
['greet', 'hi', 'is', 'there', 'anything', 'i', 'can', 'help', 'you', 'with', '.', 'eos']
in ['greet', 'hi', 'is', 'there']
out anything
in ['hi', 'is', 'there', 'anything']
out i
in ['is', 'there', 'anything', 'i']
out can
in ['there', 'anything', 'i', 'can']
out help
in ['anything', 'i', 'can', 'help']
out you
in ['i', 'can', 'help', 'you']
out with
in ['can', 'help', 'you', 'with']
out .
in ['help', 'you', 'with', '.']
out eos
['greet', 'hey', 'how', 'may', 'i', 'help', 'you', 'eos']
in ['greet', 'hey', 'how', 'may']
out i
in ['hey', 'how', 'may', 'i']
out help
in ['how', 'may', 'i', 'help']
out you
in ['may', 'i', 'help', 'you']
out eos
['

### Prepare Training and Testing Data

In [71]:
x = np.zeros((num_train_patters, sequence_length, len_vocab))
y = np.zeros((num_train_patters, len_vocab))

# encode all data into one-hot vectors
for i, sentence in enumerate(x_data):
    for t, word in enumerate(sentence):
        x[i, t, word_to_int[word]] = 1
    y[i, word_to_int[y_data[i]]] = 1
    
print(y[0])

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


### Define Model

In [72]:
learning_rate = 0.01
optimizer = RMSprop(lr=learning_rate)
num_memory_units = 256

model = Sequential()

# model.add(LSTM(num_memory_units, return_sequences=True, input_shape=(sequence_length, len_vocab)))
model.add(LSTM(num_memory_units, input_shape=(sequence_length, len_vocab)))
model.add(Dropout(0.2))
# model.add(LSTM(num_memory_units))
# model.add(Dropout(0.2))
model.add(Dense(len_vocab))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [73]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 256)               304128    
_________________________________________________________________
dropout_6 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 40)                10280     
_________________________________________________________________
activation_6 (Activation)    (None, 40)                0         
Total params: 314,408
Trainable params: 314,408
Non-trainable params: 0
_________________________________________________________________


### Train Model

In [74]:
def add_temperature(predictions, temperature=1.0):

    predictions = np.asarray(predictions).astype('float64')
    predictions = np.log(predictions) / temperature
    exp_predictions = np.exp(predictions)
    predictions = exp_predictions / np.sum(exp_predictions)
    
    probabilities = np.random.multinomial(1, predictions, 1)
    return np.argmax(probabilities)

In [75]:
num_iterations = 50
batch_size = 128
words_to_generate = 300

prev_loss = math.inf
loss_history = []
accuracy_history = []

val_loss_history = []
val_accuracy_history = []

# train the model, output generated text after each iteration
for i in range(num_iterations):
    
    print('\n' + '-'*10 + ' epoch ' + str(i+1) + '/' + str(num_iterations) + ' ' + '-'*10)
        
    history = model.fit(x, y, batch_size=batch_size, epochs=1)
    
    curr_loss = history.history['loss'][0]
    loss_history.append(curr_loss)
    
    # save weights if loss improves
    if (curr_loss < prev_loss):
        print("Loss improved from " + str(prev_loss) + " to " + str(curr_loss) + ". Saving weights.")
        model.save_weights('weights_epoch-{}_loss-{}.hdf5'.format(i, curr_loss))
        prev_loss = curr_loss
    
    sentence_index = random.randint(0, len(sents)- 1)
    # start_index = 0
    pattern = re.compile('[a-z]+|\!|\n|\.|,|;')
    all_words = re.findall(pattern, sents[sentence_index])
    print(all_words)
    start_index = random.randint(0, len(all_words) - sequence_length - 1)
    seed_sentence = all_words[start_index : start_index + sequence_length]
    print('\n-> seed: "' + ' '.join(seed_sentence) + '" ...\n')

    for i in range(words_to_generate):
        
        x_input = np.zeros((1, sequence_length, len_vocab))
        for t, word in enumerate(seed_sentence):
            x_input[0, t, word_to_int[word]] = 1.

        predictions = model.predict(x_input, verbose=0)[0]
        
        if i == num_iterations-1:
            final_predicted = predictions
        
        # predicted_word_index = add_temperature(predictions, 0.5)
        predicted_word_index = np.argmax(predictions)
        predicted_word = int_to_word[predicted_word_index]

        seed_sentence = seed_sentence[1:] + list([predicted_word])

        if re.match('[a-z]', predicted_word):
            sys.stdout.write(" " + predicted_word)
        else:
            sys.stdout.write(predicted_word)
    
        sys.stdout.flush()
        
    print()


---------- epoch 1/50 ----------
Epoch 1/1
Loss improved from inf to 3.5954136131891135. Saving weights.
['bye', 'thank', 'you', '.', 'take', 'care', 'good', 'bye', '.', 'eos']

-> seed: ". take care good" ...

 eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos

['requestnumber', 'can', 'i', 'have', 'your', 'request', 'number', '.', 'eos']

-> seed: "i have your request" ...

 number number go ahead ahead with request eos go go ahead request request number go ahead ahead with request eos go go ahead request request number go ahead ahead with request eos go go ahead request request number go ahead ahead with request eos go go ahead request request number go ahead ahead with request eos go go ahead request request number go ahead ahead with request eos go go ahead request request number go ahead ahead with request eos go go ahead request request number go ahead ahead with request eos go go ahead request request number go ahead ahead with request eos go go ahead request request number go ahead ahead with request eos go go ahead request request number go ahead ahead with request eos go go ahead request request number go ahead ahead with request eos go go ahead request request number go ahead ahead with request eos go go ahead request request numbe

 eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take bye eos eos. have eos good bye. eos a take

 take care good bye. eos a take bye eos.. take good bye bye. a have nice bye.. eos take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. take take bye bye.. ta

. eos ahead with bye eos eos eos have eos... eos query bye query i help nice help. eos eos eos bye eos eos eos have eos... eos query bye query i help nice help. eos eos eos bye eos eos eos have eos... eos query bye query i help nice help. eos eos eos bye eos eos eos have eos... eos query bye query i help nice help. eos eos eos bye eos eos eos have eos... eos query bye query i help nice help. eos eos eos bye eos eos eos have eos... eos query bye query i help nice help. eos eos eos bye eos eos eos have eos... eos query bye query i help nice help. eos eos eos bye eos eos eos have eos... eos query bye query i help nice help. eos eos eos bye eos eos eos have eos... eos query bye query i help nice help. eos eos eos bye eos eos eos have eos... eos query bye query i help nice help. eos eos eos bye eos eos eos have eos... eos query bye query i help nice help. eos eos eos bye eos eos eos have eos... eos query bye query i help nice help. eos eos eos bye eos eos eos have eos... eos query bye query

 number. eos ahead with bye eos eos eos have eos... good query bye bye a i have have eos number.. eos ahead bye bye eos have have nice day.. eos good bye bye eos a have have eos... good query bye bye a i have have eos number.. eos ahead bye bye eos have have nice day.. eos good bye bye eos a have have eos... good query bye bye a i have have eos number.. eos ahead bye bye eos have have nice day.. eos good bye bye eos a have have eos... good query bye bye a i have have eos number.. eos ahead bye bye eos have have nice day.. eos good bye bye eos a have have eos... good query bye bye a i have have eos number.. eos ahead bye bye eos have have nice day.. eos good bye bye eos a have have eos... good query bye bye a i have have eos number.. eos ahead bye bye eos have have nice day.. eos good bye bye eos a have have eos... good query bye bye a i have have eos number.. eos ahead bye bye eos have have nice day.. eos good bye bye eos a have have eos... good query bye bye a i have have eos number..

 eos. eos good eos bye eos a eos have eos eos.. eos good bye bye eos a have have eos... good bye bye bye a have have nice... eos bye bye bye have have nice day.. eos good bye bye eos a have have eos... good bye bye bye a have have nice... eos bye bye bye have have nice day.. eos good bye bye eos a have have eos... good bye bye bye a have have nice... eos bye bye bye have have nice day.. eos good bye bye eos a have have eos... good bye bye bye a have have nice... eos bye bye bye have have nice day.. eos good bye bye eos a have have eos... good bye bye bye a have have nice... eos bye bye bye have have nice day.. eos good bye bye eos a have have eos... good bye bye bye a have have nice... eos bye bye bye have have nice day.. eos good bye bye eos a have have eos... good bye bye bye a have have nice... eos bye bye bye have have nice day.. eos good bye bye eos a have have eos... good bye bye bye a have have nice... eos bye bye bye have have nice day.. eos good bye bye eos a have have eos... 

 i help you eos. eos good eos bye eos a eos have eos eos.. eos good bye bye eos a have have eos... good bye bye bye a have have nice... eos query bye bye i help have eos... good bye bye bye a have have nice... eos query bye bye i help have eos... good bye bye bye a have have nice... eos query bye bye i help have eos... good bye bye bye a have have nice... eos query bye bye i help have eos... good bye bye bye a have have nice... eos query bye bye i help have eos... good bye bye bye a have have nice... eos query bye bye i help have eos... good bye bye bye a have have nice... eos query bye bye i help have eos... good bye bye bye a have have nice... eos query bye bye i help have eos... good bye bye bye a have have nice... eos query bye bye i help have eos... good bye bye bye a have have nice... eos query bye bye i help have eos... good bye bye bye a have have nice... eos query bye bye i help have eos... good bye bye bye a have have nice... eos query bye bye i help have eos... good bye bye 

Loss improved from 0.00040880061082969435 to 0.00038432076021830444. Saving weights.
['greet', 'hi', 'is', 'there', 'anything', 'i', 'can', 'help', 'you', 'with', '.', 'eos']

-> seed: "help you with ." ...

 eos eos eos bye eos eos eos have eos... good bye bye bye a have have nice... eos query bye query i i nice help number. eos eos with bye eos eos eos have eos... good bye bye bye a have have nice... eos query bye query i i nice help number. eos eos with bye eos eos eos have eos... good bye bye bye a have have nice... eos query bye query i i nice help number. eos eos with bye eos eos eos have eos... good bye bye bye a have have nice... eos query bye query i i nice help number. eos eos with bye eos eos eos have eos... good bye bye bye a have have nice... eos query bye query i i nice help number. eos eos with bye eos eos eos have eos... good bye bye bye a have have nice... eos query bye query i i nice help number. eos eos with bye eos eos eos have eos... good bye bye bye a have have ni

 eos a nice bye eos eos have nice eos day. eos eos good bye eos eos a nice eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos eos e

In [76]:
loss = model.evaluate(x, y, batch_size=batch_size, verbose=1)
print("loss: ", loss)

loss:  1.3806826977130493


In [77]:
print('loss history:')
print(loss_history)

# plt.figure(figsize=(15,8))
# plt.rc('font', size=20)
# plt.plot(loss_history, lw=3, c='orange')
# plt.title('Cross Entropy Loss of LSTM Model over Epoch Iterations', fontsize=25)
# plt.ylabel('Loss')
# plt.xlabel('Epochs')
# plt.savefig("loss.png")
# plt.grid()
# plt.show()

loss history:
[3.5954136131891135, 3.1268426473023463, 2.1005616148964306, 1.4585823374367803, 0.991880620437893, 1.1835916504833868, 0.7692906752310164, 0.4737190281758543, 0.21310019167394587, 0.17192594861723687, 0.10441568697410855, 0.24911979264248915, 0.3636281568495954, 0.08949943078183085, 0.049691579849179324, 0.028322378163292108, 0.06638879764601181, 0.08761002404279396, 0.023385998940532976, 0.021306303874309596, 0.01900423945136409, 0.016121353282303106, 0.013350839054853212, 0.008651193246611805, 0.016091076606284074, 0.006074053192093887, 0.004824016942983605, 0.004430288252995802, 0.0033697739543769854, 0.005366130630215986, 0.005503204985791461, 0.0025303244458440223, 0.002515479364391681, 0.0011961996912208126, 0.001105685597489196, 0.001424571113722002, 0.002130011564166453, 0.0008145180677796608, 0.0005615403277092089, 0.0005906567688715832, 0.00047660617239244, 0.00040880061082969435, 0.0004199346824220886, 0.00038432076021830444, 0.0002729057818155003, 0.000247546

In [78]:
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 256)               304128    
_________________________________________________________________
dropout_6 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 40)                10280     
_________________________________________________________________
activation_6 (Activation)    (None, 40)                0         
Total params: 314,408
Trainable params: 314,408
Non-trainable params: 0
_________________________________________________________________
None


In [79]:
plot_model(model, to_file='model_plot.png')

OSError: `pydot` failed to call GraphViz.Please install GraphViz (https://www.graphviz.org/) and ensure that its executables are in the $PATH.

In [94]:
seed_sentence = ["thank", "you", ".", "take"]
for i in range(4):
    x_input = np.zeros((1, sequence_length, len_vocab))
    for t, word in enumerate(seed_sentence):
        x_input[0, t, word_to_int[word]] = 1.

    predictions = model.predict(x_input, verbose=0)[0]

    if i == num_iterations-1:
        final_predicted = predictions

    # predicted_word_index = add_temperature(predictions, 0.5)
    predicted_word_index = np.argmax(predictions)
    predicted_word = int_to_word[predicted_word_index]

    seed_sentence = seed_sentence[1:] + list([predicted_word])

    if re.match('[a-z]', predicted_word):
        sys.stdout.write(" " + predicted_word)
    else:
        sys.stdout.write(predicted_word)

    sys.stdout.flush()


 care good bye.