In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
import pickle
import numpy as np
import os

In [14]:
files = open("metamorphosis.txt", 'r',encoding="latin1")
lines = []
for i in files:
    lines.append(i)

In [15]:
lines[0]

'The Project Gutenberg eBook of Metamorphosis\n'

In [20]:
data = ""
for i in lines:
    data = " ".join(lines)
data = data.replace('\n', '').replace('\r', '').replace(r'\uteff', '')

In [21]:
data[:400]

'The Project Gutenberg eBook of Metamorphosis      This ebook is for the use of anyone anywhere in the United States and most other parts of the world at no cost and with almost no restrictions whatsoever. You may copy it, give it away or re-use it under the terms of the Project Gutenberg License included with this ebook or online at www.gutenberg.org. If you are not located in the United States, y'

In [22]:
import string
translater = str.maketrans(string.punctuation, ' '*len(string.punctuation))
new_data = data.translate(translater)

In [24]:
new_data = new_data.translate(translater)
new_data[:400]

'The Project Gutenberg eBook of Metamorphosis      This ebook is for the use of anyone anywhere in the United States and most other parts of the world at no cost and with almost no restrictions whatsoever  You may copy it  give it away or re use it under the terms of the Project Gutenberg License included with this ebook or online at www gutenberg org  If you are not located in the United States  y'

In [25]:
new = []
for i in data.split():
    if i not in new:
        new.append(i)
data = " ".join(new)

In [31]:
data



In [32]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])

In [33]:
pickle.dump(tokenizer, open('tokenizer.pkl','wb'))

In [36]:
sequence_data = tokenizer.texts_to_sequences([data])[0]

In [37]:
sequence_data[:10]

[57, 329, 5, 17, 30, 330, 8, 17, 58, 31]

In [38]:
vocab_size = len(tokenizer.word_index)+1

In [39]:
vocab_size

3246

In [41]:
sequences = []
for i in range(1, len(sequence_data)):
    words = sequence_data[i-1:i+1]
    sequences.append(words)

In [42]:
len(sequences)

4773

In [44]:
sequences = np.array(sequences)

In [45]:
sequences[-2]

array([ 860, 3245])

In [46]:
x =[]
y = []
for i in sequences:
    x.append(i[0])
    y.append(i[1])
x = np.array(x)
y = np.array(y)

In [48]:
y = to_categorical(y, num_classes = vocab_size)
y

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [50]:
model = Sequential()

In [52]:
model.add(Embedding(vocab_size, 10, input_length = 1))
model.add(LSTM(units=1024, return_sequences=True))
model.add(LSTM(units=1024))
model.add(Dense(1024, activation='relu'))
model.add(Dense(vocab_size, activation = 'softmax'))

In [53]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 1, 10)             32460     
                                                                 
 lstm (LSTM)                 (None, 1, 1024)           4239360   
                                                                 
 lstm_1 (LSTM)               (None, 1024)              8392704   
                                                                 
 dense (Dense)               (None, 1024)              1049600   
                                                                 
 dense_1 (Dense)             (None, 3246)              3327150   
                                                                 
Total params: 17,041,274
Trainable params: 17,041,274
Non-trainable params: 0
_________________________________________________________________


In [56]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau, TensorBoard

checkpoints = ModelCheckpoint('model_nwp.h5', monitor='loss', verbose=1, save_best_only=True, mode='auto')
reduces = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=3, min_lr = 0.0001, verbose=1)
logdir = 'lognw'
tensorboard_vis = TensorBoard(log_dir= logdir)

In [58]:
from tensorflow.keras.optimizers import Adam
model.compile(optimizer = Adam(learning_rate = 0.001), loss= 'categorical_crossentropy')

In [61]:
model.fit(x,y, epochs=75, callbacks = [checkpoints, reduces, tensorboard_vis])

Epoch 1/75
Epoch 1: loss improved from inf to 8.09738, saving model to model_nwp.h5
Epoch 2/75
Epoch 2: loss improved from 8.09738 to 8.06194, saving model to model_nwp.h5
Epoch 3/75
Epoch 3: loss improved from 8.06194 to 7.93485, saving model to model_nwp.h5
Epoch 4/75
Epoch 4: loss improved from 7.93485 to 7.58805, saving model to model_nwp.h5
Epoch 5/75
Epoch 5: loss improved from 7.58805 to 7.19480, saving model to model_nwp.h5
Epoch 6/75
Epoch 6: loss improved from 7.19480 to 6.85440, saving model to model_nwp.h5
Epoch 7/75
Epoch 7: loss improved from 6.85440 to 6.53449, saving model to model_nwp.h5
Epoch 8/75
Epoch 8: loss improved from 6.53449 to 6.18016, saving model to model_nwp.h5
Epoch 9/75
Epoch 9: loss improved from 6.18016 to 5.79546, saving model to model_nwp.h5
Epoch 10/75
Epoch 10: loss improved from 5.79546 to 5.45279, saving model to model_nwp.h5
Epoch 11/75
Epoch 11: loss improved from 5.45279 to 5.13975, saving model to model_nwp.h5
Epoch 12/75
Epoch 12: loss impro

Epoch 35/75
Epoch 35: loss improved from 2.08987 to 2.02084, saving model to model_nwp.h5
Epoch 36/75
Epoch 36: loss improved from 2.02084 to 1.97530, saving model to model_nwp.h5
Epoch 37/75
Epoch 37: loss improved from 1.97530 to 1.90802, saving model to model_nwp.h5
Epoch 38/75
Epoch 38: loss improved from 1.90802 to 1.84705, saving model to model_nwp.h5
Epoch 39/75
Epoch 39: loss improved from 1.84705 to 1.81137, saving model to model_nwp.h5
Epoch 40/75
Epoch 40: loss improved from 1.81137 to 1.76467, saving model to model_nwp.h5
Epoch 41/75
Epoch 41: loss improved from 1.76467 to 1.70815, saving model to model_nwp.h5
Epoch 42/75
Epoch 42: loss improved from 1.70815 to 1.66261, saving model to model_nwp.h5
Epoch 43/75
Epoch 43: loss improved from 1.66261 to 1.65543, saving model to model_nwp.h5
Epoch 44/75
Epoch 44: loss improved from 1.65543 to 1.59553, saving model to model_nwp.h5
Epoch 45/75
Epoch 45: loss improved from 1.59553 to 1.58199, saving model to model_nwp.h5
Epoch 46/7


Epoch 69: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 70/75
Epoch 70: loss improved from 1.10528 to 0.84522, saving model to model_nwp.h5
Epoch 71/75
Epoch 71: loss improved from 0.84522 to 0.74542, saving model to model_nwp.h5
Epoch 72/75
Epoch 72: loss improved from 0.74542 to 0.71649, saving model to model_nwp.h5
Epoch 73/75
Epoch 73: loss improved from 0.71649 to 0.70226, saving model to model_nwp.h5
Epoch 74/75
Epoch 74: loss improved from 0.70226 to 0.69605, saving model to model_nwp.h5
Epoch 75/75
Epoch 75: loss improved from 0.69605 to 0.69445, saving model to model_nwp.h5


<keras.callbacks.History at 0x198e0f73280>

In [62]:
%load_ext tensorboard
%tensorboard --logdir= './logsnw'

ERROR: Failed to launch TensorBoard (exited with 2).
Contents of stderr:
usage: tensorboard [-h] [--helpfull] [--logdir PATH] [--logdir_spec PATH_SPEC]
                   [--host ADDR] [--bind_all] [--port PORT]
                   [--reuse_port BOOL] [--load_fast {false,auto,true}]
                   [--extra_data_server_flags EXTRA_DATA_SERVER_FLAGS]
                   [--grpc_creds_type {local,ssl,ssl_dev}]
                   [--grpc_data_provider PORT] [--purge_orphaned_data BOOL]
                   [--db URI] [--db_import] [--inspect] [--version_tb]
                   [--tag TAG] [--event_file PATH] [--path_prefix PATH]
                   [--window_title TEXT] [--max_reload_threads COUNT]
                   [--reload_interval SECONDS] [--reload_task TYPE]
                   [--reload_multifile BOOL]
                   [--reload_multifile_inactive_secs SECONDS]
                   [--generic_data TYPE]
                   [--samples_per_plugin SAMPLES_PER_PLUGIN]
                   [-

In [66]:
def predictive_text_next(Text):
    sequence = tokenizer.texts_to_sequences([text])[0]
    sequence = np.array(sequence)
    preds = model.predict(sequence)
    p_class = np.argmax(preds)
    p_word = tokenizer.index_word[p_class]
    return p_word

In [69]:
text = input("enter text:")
text = text.split(' ')
text = text[-1]
text = ''.join(text)
predictive_text_next(text)

enter text:re-use it under the terms of the


'project'