In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

In [3]:
# Read the text file
with open('./sherlock-holm.es_stories_plain-text_advs.txt', 'r', encoding='utf-8') as file:
    text = file.read()

In [4]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1

In [5]:
input_sequences = []
for line in text.split('\n'):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

In [6]:
max_sequence_len = max([len(seq) for seq in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

In [7]:
X = input_sequences[:, :-1]
y = input_sequences[:, -1]

In [8]:
y = np.array(tf.keras.utils.to_categorical(y, num_classes=total_words))

In [9]:
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(LSTM(150))
model.add(Dense(total_words, activation='softmax'))
print(model.summary())



None


In [10]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=10, verbose=1)

Epoch 1/10
[1m3010/3010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 15ms/step - accuracy: 0.0613 - loss: 6.5608
Epoch 2/10
[1m3010/3010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 16ms/step - accuracy: 0.1167 - loss: 5.5776
Epoch 3/10
[1m3010/3010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 16ms/step - accuracy: 0.1435 - loss: 5.1606
Epoch 4/10
[1m3010/3010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 15ms/step - accuracy: 0.1623 - loss: 4.8130
Epoch 5/10
[1m3010/3010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 15ms/step - accuracy: 0.1820 - loss: 4.4925
Epoch 6/10
[1m3010/3010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 16ms/step - accuracy: 0.2013 - loss: 4.1992
Epoch 7/10
[1m3010/3010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 16ms/step - accuracy: 0.2254 - loss: 3.9216
Epoch 8/10
[1m3010/3010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 16ms/step - accuracy: 0.2625 - loss: 3.6384
Epoch 9/

<keras.src.callbacks.history.History at 0x27af5613590>

In [14]:
seed_text = "I will leave if they"
next_words = 5

for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
    predicted = np.argmax(model.predict(token_list), axis=-1)
    output_word = ""
    for word, index in tokenizer.word_index.items():
        if index == predicted:
            output_word = word
            break
    seed_text += " " + output_word

print(seed_text)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
I will leave if they are not very much to


In [15]:
model.state_dict()

AttributeError: 'Sequential' object has no attribute 'state_dict'

In [17]:
!pip install pyyaml h5py  



In [18]:
import os

import tensorflow as tf
from tensorflow import keras

print(tf.version.VERSION)

2.16.1


In [19]:
model.summary()

In [23]:
checkpoint_path = "./training_1/cp.keras"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 verbose=1)

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=20, verbose=1, callbacks=[cp_callback])

Epoch 1/20
[1m3009/3010[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - accuracy: 0.3151 - loss: 3.4842
Epoch 1: saving model to ./training_1/cp.keras
[1m3010/3010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 17ms/step - accuracy: 0.3150 - loss: 3.4844
Epoch 2/20
[1m3010/3010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.3543 - loss: 3.1191
Epoch 2: saving model to ./training_1/cp.keras
[1m3010/3010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 17ms/step - accuracy: 0.3543 - loss: 3.1192
Epoch 3/20
[1m3010/3010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.4069 - loss: 2.7933
Epoch 3: saving model to ./training_1/cp.keras
[1m3010/3010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 17ms/step - accuracy: 0.4068 - loss: 2.7933
Epoch 4/20
[1m3010/3010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.4485 - loss: 2.5587
Epoch 4: saving model to ./t

<keras.src.callbacks.history.History at 0x27b0baa0190>

In [25]:
model.save('./predictor.keras')

In [27]:
import pickle
# saving the tokenizer for predict function.
pickle.dump(tokenizer, open('tokenizer1.pkl', 'wb'))

In [28]:
max_sequence_len

18