In [1]:
import numpy as np
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Activation
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical

In [2]:
SEQ_LENGTH = 100 

In [3]:
def buildmodel(VOCABULARY):
    model = Sequential()
    model.add(LSTM(512, input_shape = (SEQ_LENGTH, 1), return_sequences = True))
    model.add(Dropout(0.2))
    model.add(LSTM(512))
    model.add(Dropout(0.2))
    model.add(Dense(VOCABULARY, activation = 'softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')
    return model

In [4]:
file = open('F:\Projects\Intelligent keyboard\samp_dataset.txt',encoding = 'utf8')
raw_text = file.read()    #you need to read further characters as well
raw_text = raw_text.lower()

In [5]:
chars = sorted(list(set(raw_text)))
print(chars)

bad_chars = ['#',':','\n','\ufeff','!','ä', 'æ', 'é', 'ë', '"', "'", ':', ';', '=', '?', '[', ']', '_',  '(', ')', ',', '-', '.', ';', '=', '?', '[', ']', '_','\n', '!', '"', "'", '(', ')', ',','.', '*', '@', '†','(',')',',','-','=',']','[','!', '"', "'", '(', ')', ',', ':', ';', '=', '\n','¤', '¦', '©', '«', 'ã', '†' ' ', '!', '.', '?', '[', ']', '_','-', '.','_','?',"'",'"',';',':']
for i in range(len(bad_chars)):
    raw_text = raw_text.replace(bad_chars[i],"") 

chars = sorted(list(set(raw_text)))
print(chars)

['\n', ' ', '!', '"', "'", '(', ')', ',', '-', '.', ':', ';', '?', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\ufeff']
[' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [6]:
text_length = len(raw_text)
char_length = len(chars)
VOCABULARY = char_length
print("Text length = " + str(text_length))
print("No. of characters = " + str(char_length))

Text length = 6153
No. of characters = 27


In [7]:
char_to_int = dict((c, i) for i, c in enumerate(chars))
input_strings = []
output_strings = []

for i in range(len(raw_text) - SEQ_LENGTH):
    X_text = raw_text[i: i + SEQ_LENGTH]
    X = [char_to_int[char] for char in X_text]
    input_strings.append(X)    
    Y = raw_text[i + SEQ_LENGTH]
    output_strings.append(char_to_int[Y])

In [8]:
length = len(input_strings)
input_strings = np.array(input_strings)
input_strings = np.reshape(input_strings, (input_strings.shape[0], input_strings.shape[1], 1))
input_strings = input_strings/float(VOCABULARY)

output_strings = np.array(output_strings)
output_strings = to_categorical(output_strings)
print(input_strings.shape)
print(output_strings.shape)

(6053, 100, 1)
(6053, 27)


In [None]:
model = buildmodel(VOCABULARY)
filepath="F:\Projects\Intelligent keyboard\ori_dataset\weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

history = model.fit(input_strings, output_strings, epochs = 200, batch_size =512, callbacks = callbacks_list)

In [10]:
filename = 'F:\Projects\Intelligent keyboard\samp_model\weights-improvement-199-0.0006.hdf5'
model = buildmodel(VOCABULARY)
model.load_weights(filename)
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')

In [19]:
import ipywidgets as widgets
from IPython.display import display
original_text = []
predicted_text = []
int_to_char = dict((i, c) for i, c in enumerate(chars))
text = widgets.Text()
display(text)

def handle_submit(sender):
    global predicted_text
    global original_text
    
    inp = list(text.value)
    
    last_word = inp[len(original_text):]
    inp = inp[:len(original_text)]    
    original_text = text.value    
    last_word.append(' ')
    
    inp_text = [char_to_int[c] for c in inp]
    last_word = [char_to_int[c] for c in last_word]
    
    if len(inp_text) > 100:
        inp_text = inp_text[len(inp_text)-100: ]
    if len(inp_text) < 100:
        pad = []
        space = char_to_int[' ']
        pad = [space for i in range(100-len(inp_text))]
        inp_text = pad + inp_text
    
    while len(last_word)>0:
        X = np.reshape(inp_text, (1, SEQ_LENGTH, 1))
        next_char = model.predict(X/float(VOCABULARY))
        inp_text.append(last_word[0])
        inp_text = inp_text[1:]
        last_word.pop(0)
    
    next_word = []
    next_char = ':'
    while next_char != ' ':
        X = np.reshape(inp_text, (1, SEQ_LENGTH, 1))
        next_char = model.predict(X/float(VOCABULARY))
        index = np.argmax(next_char)        
        next_word.append(int_to_char[index])
        inp_text.append(index)
        inp_text = inp_text[1:]
        next_char = int_to_char[index]
    
    predicted_text = predicted_text + [''.join(next_word)]
    print(" " + ''.join(next_word), end='|')
    
text.on_submit(handle_submit)

Text(value='')

 ti |  | int | much | a | loot | high | and | her | face | lightedup | at | the | thought | that | she | was | now | the | right | size | to | go | through | thesmall | door | and | get | out | to | that | lovely | gardenillustrationpoor | llcps |

In [21]:
from tabulate import tabulate

original_text = original_text.split()
predicted_text.insert(0,"")
predicted_text.pop()

table = []
for i in range(len(original_text)):
    table.append([original_text[i], predicted_text[i]])
print(tabulate(table, headers = ['Actual Word', 'Predicted Word']))

Actual Word    Predicted Word
-------------  ----------------------
she
was            ti
now
not            int
quite          much
a              a
foot           loot
high           high
and            and
her            her
face           face
lightedup      lightedup
at             at
the            the
thought        thought
that           that
she            she
was            was
now            now
the            the
right          right
size           size
to             to
go             go
through        through
thesmall       thesmall
door           door
and            and
get            get
out            out
to             to
that           that
lovely         lovely
gardern        gardenillustrationpoor
