In [3]:
# Imports

from keras import Sequential
from keras import models
from keras.layers import LSTM, Dense, Dropout, Flatten, Activation, Reshape
from keras.losses import categorical_crossentropy
from sklearn import preprocessing
import numpy as np

In [1]:
# Sort

text = open("data/other.txt", "r", encoding="utf-8").read()

chardict = sorted(list(set(text)))

total = len(text)
chars = len(chardict)

print("Total Charaters        :", total)
print("Total Unique Charaters :", chars)

Total Charaters        : 250807
Total Unique Charaters : 84


In [19]:
# Format

chunklength = 50
step = 2
sentences = []
characters = []

for i in range(0, len(text) - chunklength, step):
    sentences.append(text[i : i + chunklength])
    characters.append(text[i + chunklength])

chunks = len(sentences)
print("Total Chunks :", chunks)

Total Chunks : 125379


In [20]:
# Format

x = np.zeros(chunks * chunklength * chars, np.bool).reshape(chunks, chunklength, chars)
y = np.zeros(chunks * 1 * chars, np.bool).reshape(chunks, chars)

for i,v in enumerate(sentences):
    for a,b in enumerate(v):
        x[i][a][chardict.index(b)] = True

for i,v in enumerate(characters):
    y[i][chardict.index(v)] = True

print("Total Data Values  : ", chunks * chunklength * chars)
print("Total Label Values : ", chunks * chars)
print("X Shape :", x.shape)
print("Y Shape :", y.shape)

Total Data Values  :  526591800
Total Label Values :  10531836
X Shape : (125379, 50, 84)
Y Shape : (125379, 84)


In [68]:
# Model

model = Sequential()
model.add(LSTM(2 * chars, return_sequences=True, input_shape=(chunklength, chars)))
model.add(Dense(chars))
model.add(Flatten())
model.add(Dense(chars, dtype="float64"))
model.add(Activation("softmax", dtype="float64"))

model.summary()

model.compile(optimizer="rmsprop", loss=categorical_crossentropy)

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 75, 132)           105072    
_________________________________________________________________
dense_10 (Dense)             (None, 75, 66)            8778      
_________________________________________________________________
flatten_5 (Flatten)          (None, 4950)              0         
_________________________________________________________________
dense_11 (Dense)             (None, 66)                326766    
_________________________________________________________________
activation_5 (Activation)    (None, 66)                0         
Total params: 440,616
Trainable params: 440,616
Non-trainable params: 0
_________________________________________________________________


In [69]:
# Train

model.fit(x=x, y=y, batch_size=chunklength, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x23f3ea634f0>

In [28]:
model = models.load_model("models/novelbot2-4b")

In [29]:
# User Input

userinput = "Hello there, I am"

userdata = np.zeros(chunklength * chars, np.bool).reshape(1, chunklength, chars)

for i,v in enumerate(userinput):
        userdata[0][i][chardict.index(v)] = True

In [30]:
# Input Data

inputdata = userdata #x[2].reshape(1, chunklength, chars)

print("Input shape : ", inputdata.shape)

Input shape :  (1, 50, 84)


In [31]:
# Sample

totalclean = ""

for i in range(chunklength * 20):
    prediction = model.predict(inputdata).astype("float64")
    prediction = np.random.multinomial(1, prediction[0], 1)

    for a,b in enumerate(prediction[0]):
        if b:
            prediction[0][a] = True
            totalclean += chardict[a]
        else:
            prediction[0][a] = False

    inputdata[0] = np.append(inputdata[0][1:], prediction).reshape(chunklength, chars)

In [9]:
print(totalclean) # Version A

 the with a rasuned of the realled for a the dongter do men worls were as for the
west gad agind to dew revinitior ous blooge of her pined; and then were harn the wobwher bout excominting to the rover a praintis in lows looken whe I naw, rest neaty," said be fiicucy on the with a conla goo tee
in this celfy
momans age"intent grear im."
"Tors, so jeds, that have yoo
flend in you modMing be have the ition maven remincation wom."
h usy reselveny to devily and whan poorople all asing; I sarn out the ave thebser
and the comencess anp s my Nillomssond.

 it was a calsting ot at lyot repinter to chat you grond maningiwas colfrceet
enthins idenengserticlunde, I raarrionsiest of with of I wanded,
intent mosed breast faple of gre, not en falber.
"What whit  pleck."

"I
herdmes, the saiec, and most ground at entrinme. his if detion;
by 
out will chick all brissked mes large, and chwed addthe tone proald
entfallectaged elround courning, in eamour distuish, and a there groos
deements raful of my th

In [32]:
print(totalclean) # Version B

glowityod."d
my some impetigt of here the gavent, "wh there_ is
h"

sobing line soos. Igeen, whecert win, when boun sote knfwedd charablerd, wishopenic that it gage. 



I fhot had her such as troe wrodded to voide on the still
to pleasant," indisity were bliomed butn mise before her manner wantsemely
dearling.


"I cann havenus woken to the sood was will sed the."

I repriedity ."

It weren incrusibl cilurite a boft, and dews theen over intung four what in an
was at tear, the vary my by the pare dinness onch this droableking kainst bochluwly
!"
ut Carmacove us.

I thater, warm purcuaply to ted all mought so emitivangess care, aluterablity
my beside their difation agreent, and looking at the four to be who open; bet
nuils presence usual almost altion the bach.

"As g'lking oggrethed," she samisbat, is were sho4s oply Egerton havand for the
firate, havinire
ant call only her feafued but while the
odigent a lors for I must admired tumber, grented my ould
make fortion angle, and piud grov

In [74]:
model.save("models/novelbot2-3")

INFO:tensorflow:Assets written to: models/novelbot2-3\assets
