In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, LSTM, Dense

# Load and preprocess the data
with open('poetry.txt', 'r') as file:
    text = file.read().lower()

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1
input_sequences = []

# Create input sequences
for line in text.split('\n'):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

# Pad sequences and create predictors and labels
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
X = input_sequences[:,:-1]
y = input_sequences[:,-1]
y = to_categorical(y, num_classes=total_words)

# Define an LSTM model
def create_lstm_model():
    model = Sequential()
    model.add(Embedding(total_words, 64, input_length=max_sequence_len-1))
    model.add(LSTM(128))  # Increased units for improved learning
    model.add(Dense(total_words, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Create and train LSTM model
lstm_model = create_lstm_model()
lstm_model.fit(X, y, epochs=250, verbose=1)  # Increased epochs

# Text generation function with temperature sampling
def generate_text(seed_text, next_words, model, max_sequence_len, temperature=0.7):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        
        # Predict with temperature
        predictions = model.predict(token_list, verbose=0)
        predictions = np.asarray(predictions).astype("float64")
        predictions = np.log(predictions + 1e-10) / temperature  # Apply temperature
        exp_preds = np.exp(predictions)
        predictions = exp_preds / np.sum(exp_preds)
        
        # Sample the next word index
        predicted = np.random.choice(range(total_words), p=predictions[0])
        
        # Convert predicted index to word
        output_word = tokenizer.index_word.get(predicted, "")
        seed_text += " " + output_word
    return seed_text

# Generate text with temperature sampling
seed_text = "The woods are lovely"
output_text = generate_text(seed_text, next_words=10, model=lstm_model, max_sequence_len=max_sequence_len, temperature=0.8)
print("Generated text:", output_text)


Epoch 1/250
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.0073 - loss: 4.8979    
Epoch 2/250
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.0503 - loss: 4.8838 
Epoch 3/250
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.0689 - loss: 4.8545 
Epoch 4/250
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0672 - loss: 4.7626 
Epoch 5/250
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.0604 - loss: 4.6042 
Epoch 6/250
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.0488 - loss: 4.5904
Epoch 7/250
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0575 - loss: 4.5732     
Epoch 8/250
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0703 - loss: 4.5208 
Epoch 9/250
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━