In [47]:
# Imports

from keras import Sequential
from keras import models
from keras.layers import LSTM, Dense, Dropout, Flatten, Activation, Reshape
from keras.losses import categorical_crossentropy
from sklearn import preprocessing
import numpy as np

In [48]:
# Sort

text = open("data\\romeoandjuliet.txt", "r").read()

chardict = sorted(list(set(text)))

total = len(text)
chars = len(chardict)

print("Total Charaters        :", total)
print("Total Unique Charaters :", chars)

Total Charaters        : 52577
Total Unique Charaters : 66


In [49]:
# Format

chunklength = 75
step = 1
sentences = []
characters = []

for i in range(0, len(text) - chunklength, step):
    sentences.append(text[i : i + chunklength])
    characters.append(text[i + chunklength])

chunks = len(sentences)
print("Total Chunks :", chunks)

Total Chunks : 26264


In [50]:
# Format

x = np.zeros(chunks * chunklength * chars, np.bool).reshape(chunks, chunklength, chars)
y = np.zeros(chunks * 1 * chars, np.bool).reshape(chunks, chars)

for i,v in enumerate(sentences):
    for a,b in enumerate(v):
        x[i][a][chardict.index(b)] = True

for i,v in enumerate(characters):
    y[i][chardict.index(v)] = True

print("Total Data Values  : ", chunks * chunklength * chars)
print("Total Label Values : ", chunks * chars)
print("X Shape :", x.shape)
print("Y Shape :", y.shape)

Total Data Values  :  86671200
Total Label Values :  1733424
X Shape : (26264, 50, 66)
Y Shape : (26264, 66)


In [51]:
# Model

model = Sequential()
model.add(LSTM(2 * chars, return_sequences=True, input_shape=(chunklength, chars)))
model.add(Dense(chars))
model.add(Flatten())
model.add(Dense(chars, dtype="float64"))
model.add(Activation("softmax", dtype="float64"))

model.summary()

model.compile(optimizer="rmsprop", loss=categorical_crossentropy)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 50, 66)            35112     
_________________________________________________________________
dense_8 (Dense)              (None, 50, 66)            4422      
_________________________________________________________________
flatten_4 (Flatten)          (None, 3300)              0         
_________________________________________________________________
dense_9 (Dense)              (None, 66)                217866    
_________________________________________________________________
activation_4 (Activation)    (None, 66)                0         
Total params: 257,400
Trainable params: 257,400
Non-trainable params: 0
_________________________________________________________________


In [52]:
# Train

model.fit(x=x, y=y, batch_size=chunklength, epochs=10)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x23f395c8250>

In [53]:
# User Input

userinput = "Hello there, I am"

userdata = np.zeros(chunklength * chars, np.bool).reshape(1, chunklength, chars)

for i,v in enumerate(userinput):
        userdata[0][i][chardict.index(v)] = True

In [61]:
# Input Data

inputdata = x[2].reshape(1, chunklength, chars)

print("Input shape : ", inputdata.shape)

Input shape :  (1, 50, 66)


In [62]:
# Sample

totalclean = ""

for i in range(chunklength * 5):
    prediction = model.predict(inputdata).astype("float64")
    prediction = np.random.multinomial(1, prediction[0], 1)

    for a,b in enumerate(prediction[0]):
        if b == 1:
            prediction[0][a] = True
            totalclean += chardict[a]
        else:
            prediction[0][a] = False

    inputdata[0] = np.append(inputdata[0][1:], prediction).reshape(chunklength, chars)

In [63]:
print(totalclean)

EbE TaD

er vouin fir
Roleo,
Then she is hup an an:
The ke his him himer

Is leed savand wergon--
uni, hhall she mort roigh.

Romeo.
Whoc that horselantsh more here whith, in hy, mascre's douned and fto lepen
Tyou dighhre wat naty frahey sid not she with
A off-rise tie eye:
moree, preve to the wich a morner bond my practien groang hus hath,

Romeo, my consthai yourt ll shis on thy beet 
Bunvorio sould sake a frolt the my carths igo
dstientunt'd my whit,
stce lonishi a crown to poy anger for of a


In [57]:
model.save("models/novelbot2-3")

INFO:tensorflow:Assets written to: models/novelbot2-2\assets
