In [1]:
# Imports

from keras import Sequential
from keras.layers import LSTM, Dense, Dropout, Activation, Reshape
from keras.optimizers import RMSprop
from keras.losses import categorical_crossentropy, mean_absolute_error
from sklearn import preprocessing
import numpy as np


In [2]:
# Sort

text = open("data/goldfish.txt", "r").read()

chardict = sorted(list(set(text)))

total = len(text)
chars = len(chardict)

print("Total Charaters        :", total)
print("Total Unique Charaters :", chars)

Total Charaters        : 44
Total Unique Charaters : 9


In [41]:
# Format

chunklength = 5
step = 1
sentences = []
characters = []

for i in range(0, len(text) - chunklength, step):
    sentences.append(text[i : i + chunklength])
    characters.append(text[i + chunklength])

chunks = len(sentences)
print("Total Chunks :", chunks)

Total Chunks : 39
Dictionary :  ['d', 'f', 'g', 'h', 'i', 'l', 'o', 's', '±']


In [42]:
# Sample

print("Sample Chunk     : ", sentences[4])
print("Sample Character : ", characters[4])

Sample Chunk     :  fish±
Sample Character :  g


In [43]:
# Format

x = np.zeros(chunks * chunklength * chars, np.bool).reshape(chunks, chunklength, chars)
y = np.zeros(chunks * 1 * chars, np.bool).reshape(chunks, 1, chars)

for i,v in enumerate(sentences):
    for a,b in enumerate(v):
        x[i][a][chardict.index(b)] = True

for i,v in enumerate(characters):
    y[i][0][chardict.index(v)] = True

print("Total Data Values  : ", chunks * chunklength * chars)
print("Total Label Values : ", chunks * 1 * chars)
print("X Shape :", x.shape)
print("Y Shape :", y.shape)

Total Data Values  :  1755
Total Label Values :  351
X Shape : (39, 5, 9)
Y Shape : (39, 1, 9)


In [46]:
# Model

model = Sequential()
model.add(LSTM(2 * chars, return_sequences=True, input_shape=(chunklength, chars)))
model.add(Dense(chars))
model.add(Reshape((1, chunklength * chars)))
model.add(Dense((chunklength * chars) / 2))
model.add(Dense(chars))
model.add(Activation("softmax"))

model.summary()

model.compile(optimizer="rmsprop", loss=categorical_crossentropy)

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 5, 18)             2016      
_________________________________________________________________
dense_7 (Dense)              (None, 5, 9)              171       
_________________________________________________________________
reshape_2 (Reshape)          (None, 1, 45)             0         
_________________________________________________________________
dense_8 (Dense)              (None, 1, 22)             1012      
_________________________________________________________________
dense_9 (Dense)              (None, 1, 9)              207       
_________________________________________________________________
activation_3 (Activation)    (None, 1, 9)              0         
Total params: 3,406
Trainable params: 3,406
Non-trainable params: 0
____________________________________________________

In [50]:
# Train

model.fit(x=x, y=y, batch_size=chucklength, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x2944e3d62b0>

In [57]:
# Prediction

inputdata = x[3].reshape(1, chucklength, chars)

prediction = model.predict(inputdata)[0]

print("Input shape : ", inputdata.shape)

Input shape :  (1, 5, 9)


In [59]:
# Merge

mergedprediction = []

for a in prediction:
    mergedprediction = []
    for i,v in enumerate(a):
        if not len(mergedprediction) > i:
            mergedprediction.append(v)
        else:
            mergedprediction[i] += v

mergedprediction = [mergedprediction]

In [60]:
# Clean

cleaninput = []
cleanprediction = []

for a in mergedprediction:
    bi, bv = 1, -1
    for i,v in enumerate(a):
        if v > bv:
            bv = v
            bi = i
    cleanprediction.append(chardict[bi])

for a in inputdata:
    s = []
    for b in a:
        for i,v in enumerate(b):
            if v:
                s.append(chardict[i])
    cleaninput.append("".join(s))

print(cleaninput)
print(cleanprediction)

['dfish']
['±']
