In [1]:
# Imports

from keras import Sequential
from keras.layers import LSTM, Dense, Dropout, Activation, Reshape
from keras.losses import categorical_crossentropy
from sklearn import preprocessing
import numpy as np


In [2]:
# Sort

text = open("data/goldfish.txt", "r").read()

chardict = sorted(list(set(text)))

total = len(text)
chars = len(chardict)

print("Total Charaters        :", total)
print("Total Unique Charaters :", chars)

Total Charaters        : 314
Total Unique Charaters : 9


In [3]:
# Format

chunklength = 5
step = 1
sentences = []
characters = []

for i in range(0, len(text) - chunklength, step):
    sentences.append(text[i : i + chunklength])
    characters.append(text[i + chunklength])

chunks = len(sentences)
print("Total Chunks :", chunks)

Total Chunks : 309


In [4]:
# Sample

print("Sample Chunk     : ", sentences[7])
print("Sample Character : ", characters[7])

Sample Chunk     :  h
gol
Sample Character :  d


In [5]:
# Format

x = np.zeros(chunks * chunklength * chars, np.bool).reshape(chunks, chunklength, chars)
y = np.zeros(chunks * 1 * chars, np.bool).reshape(chunks, 1, chars)

for i,v in enumerate(sentences):
    for a,b in enumerate(v):
        x[i][a][chardict.index(b)] = True

for i,v in enumerate(characters):
    y[i][0][chardict.index(v)] = True

print("Total Data Values  : ", chunks * chunklength * chars)
print("Total Label Values : ", chunks * 1 * chars)
print("X Shape :", x.shape)
print("Y Shape :", y.shape)

Total Data Values  :  13905
Total Label Values :  2781
X Shape : (309, 5, 9)
Y Shape : (309, 1, 9)


In [6]:
# Model

model = Sequential()
model.add(LSTM(2 * chars, return_sequences=True, input_shape=(chunklength, chars)))
model.add(Dense(chars))
model.add(Reshape((1, chunklength * chars)))
model.add(Dense((chunklength * chars) / 2))
model.add(Dense(chars))
model.add(Activation("softmax"))

model.summary()

model.compile(optimizer="rmsprop", loss=categorical_crossentropy)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 5, 18)             2016      
_________________________________________________________________
dense (Dense)                (None, 5, 9)              171       
_________________________________________________________________
reshape (Reshape)            (None, 1, 45)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 1, 22)             1012      
_________________________________________________________________
dense_2 (Dense)              (None, 1, 9)              207       
_________________________________________________________________
activation (Activation)      (None, 1, 9)              0         
Total params: 3,406
Trainable params: 3,406
Non-trainable params: 0
______________________________________________________

In [7]:
# Train

model.fit(x=x, y=y, batch_size=chunklength, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x22cf294abe0>

In [8]:
# User Input

userinput = chardict[0] * 5 or sentences[0]

userdata = np.zeros(chunklength * chars, np.bool).reshape(1, chunklength, chars)

for i,v in enumerate(userinput):
        userdata[0][i][chardict.index(v)] = True

In [9]:
# Prediction

inputdata = userdata #x[3].reshape(1, chunklength, chars)

prediction = model.predict(inputdata)[0]

print("Input shape : ", inputdata.shape)

Input shape :  (1, 5, 9)


In [104]:
# Clean

cleaninput = []
cleanprediction = []

for a in prediction:
    bi, bv = 1, -1
    for i,v in enumerate(a):
        if v > bv:
            bv = v
            bi = i
    cleanprediction.append(chardict[bi])

for a in inputdata:
    s = []
    for b in a:
        for i,v in enumerate(b):
            if v:
                s.append(chardict[i])
    cleaninput.append("".join(s))

print(cleaninput)
print(cleanprediction)

['dfish']
['\n']


In [105]:
# New Prediction
newinput = "".join(list(i for i in cleaninput[0])[1:]) + cleanprediction[0]

userdata = np.zeros(chunklength * chars, np.bool).reshape(1, chunklength, chars)

for i,v in enumerate(newinput):
        userdata[0][i][chardict.index(v)] = True
inputdata = userdata

prediction = model.predict(inputdata)[0]