#### **Dataset Preparation**

In [1]:
!pip install tensorflow



In [2]:
import re
import pickle
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, Callback

In [3]:
# Loading the dataset
poetry_df = pd.read_csv('PoetryFoundationData.csv', nrows=1000)

In [4]:
poetry_df.head()

Unnamed: 0.1,Unnamed: 0,Title,Poem,Poet,Tags
0,0,\r\r\n Objects Used to Prop...,"\r\r\nDog bone, stapler,\r\r\ncribbage board, ...",Michelle Menting,
1,1,\r\r\n The New Church\r\r\n...,"\r\r\nThe old cupola glinted above the clouds,...",Lucia Cherciu,
2,2,\r\r\n Look for Me\r\r\n ...,\r\r\nLook for me under the hood\r\r\nof that ...,Ted Kooser,
3,3,\r\r\n Wild Life\r\r\n ...,"\r\r\nBehind the silo, the Mother Rabbit\r\r\n...",Grace Cavalieri,
4,4,\r\r\n Umbrella\r\r\n ...,\r\r\nWhen I push your button\r\r\nyou fly off...,Connie Wanek,


In [5]:
poetry_df.shape

(1000, 5)

In [6]:
# Cleaning data
poetry_df['Poem'] = poetry_df['Poem'].str.replace(r'\s+', ' ', regex=True)
poetry_df['Title'] = poetry_df['Title'].str.replace(r'\s+', ' ', regex=True)
poetry_df['input'] = poetry_df['Title'] + ' *** ' + poetry_df['Poem']

In [7]:
# Exploring the columns
print(poetry_df.columns)

Index(['Unnamed: 0', 'Title', 'Poem', 'Poet', 'Tags', 'input'], dtype='object')


In [8]:
input_data = poetry_df['input'].values.tolist()

# Printing a portion of the corpus to verify
print(input_data[:3])

[" Objects Used to Prop Open a Window  ***  Dog bone, stapler, cribbage board, garlic press because this window is loose—lacks suction, lacks grip. Bungee cord, bootstrap, dog leash, leather belt because this window had sash cords. They frayed. They broke. Feather duster, thatch of straw, empty bottle of Elmer's glue because this window is loud—its hinges clack open, clack shut. Stuffed bear, baby blanket, single crib newel because this window is split. It's dividing in two. Velvet moss, sagebrush, willow branch, robin's wing because this window, it's pane-less. It's only a frame of air. ", ' The New Church  ***  The old cupola glinted above the clouds, shone among fir trees, but it took him an hour for the half mile all the way up the hill. As he trailed, the village passed him by, greeted him, asked about his health, but everybody hurried to catch the mass, left him leaning against fences, measuring the road with the walking stick he sculpted. He yearned for the day when the new chur

#### **Data Preprocessing**

In [9]:
# Tokenizing the text (conversion of each word to a unique integer)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(input_data)
total_words = len(tokenizer.word_index) + 1

print(total_words)

30216


In [10]:
# Creating input sequences using sequences of words
input_sequences = []
for line in input_data:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, min(len(token_list), 50)):  # Cap sequence length to 50
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

# Calculating max_sequence_len
max_sequence_len = max(len(seq) for seq in input_sequences)

# Padding sequences and creating predictors and labels
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')
predictors, label = input_sequences[:, :-1], input_sequences[:, -1]
label = to_categorical(label, num_classes=total_words)

In [11]:
# Defining the batch size
batch_size = 32

# Generator function to yield batches of data
def data_generator(predictors, labels):
    dataset_size = len(predictors)
    indices = np.arange(dataset_size)
    np.random.shuffle(indices)
    for idx in indices:
        yield predictors[idx], labels[idx]

In [12]:
# Creating a TensorFlow Dataset from the generator function
dataset = tf.data.Dataset.from_generator(
    lambda: data_generator(predictors, label),
    output_signature=(
        tf.TensorSpec(shape=(predictors.shape[1],), dtype=tf.int32),
        tf.TensorSpec(shape=(label.shape[1],), dtype=tf.float32)
    )
)

In [13]:
# Shuffling and batching the dataset
dataset = dataset.shuffle(buffer_size=10000).batch(batch_size).repeat()

# Splitting the dataset into training and validation sets
train_size = 100000
val_size = 20000

train_dataset = dataset.take(train_size // batch_size)
val_dataset = dataset.skip(train_size // batch_size).take(val_size // batch_size)

In [14]:
# Defining the ModelCheckpoint callback
checkpoint_path = "model_checkpoint.keras"
checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path,
                                      monitor='val_loss',
                                      save_best_only=True,
                                      mode='min',
                                      verbose=1)

# Defining EarlyStopping callback
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=3, verbose=1)

#### **LSTM Model Development**

In [15]:
# Building the model
def create_model():
    model = Sequential()
    model.add(Embedding(total_words, 50))
    model.add(LSTM(100))
    model.add(Dropout(0.2))
    model.add(Dense(total_words, activation='softmax'))
    return model

model = create_model()
model.build(input_shape=(None, max_sequence_len))
model.summary()

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

#### **Training the Model**

In [16]:
# Training the model with batching
history = model.fit(train_dataset,
                    epochs=30,
                    verbose=1,
                    validation_data=val_dataset,
                    callbacks=[early_stopping_callback, checkpoint_callback])

Epoch 1/30
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step - accuracy: 0.0623 - loss: 7.7097
Epoch 1: val_loss improved from inf to 6.77108, saving model to model_checkpoint.keras
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m416s[0m 131ms/step - accuracy: 0.0623 - loss: 7.7096 - val_accuracy: 0.0767 - val_loss: 6.7711
Epoch 2/30
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step - accuracy: 0.0772 - loss: 6.7470
Epoch 2: val_loss improved from 6.77108 to 6.16529, saving model to model_checkpoint.keras
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m411s[0m 130ms/step - accuracy: 0.0772 - loss: 6.7470 - val_accuracy: 0.0988 - val_loss: 6.1653
Epoch 3/30
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step - accuracy: 0.0960 - loss: 6.2414
Epoch 3: val_loss improved from 6.16529 to 5.56188, saving model to model_checkpoint.keras
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [17]:
# Saving the model
model.save("trained_model.h5")

# Loading the trained model
model = tf.keras.models.load_model('trained_model.h5')



#### **Text Generation**

In [18]:
def generate_poetry(seed_text, next_words, model, max_sequence_len, tokenizer):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
        predicted = np.argmax(model.predict(token_list, verbose=0), axis=-1)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

In [20]:
seed_texts = ["The moon", "Love's embrace", "Autumn leaves"]
for seed_text in seed_texts:
    generated_poetry = generate_poetry(seed_text, 20, model, max_sequence_len, tokenizer)
    print(f"Generated poetry with seed '{seed_text}':\n{generated_poetry}\n")

Generated poetry with seed 'The moon':
The moon at my mother san brother broke up into me when you find a good to all because the sky is

Generated poetry with seed 'Love's embrace':
Love's embrace heart over the roof top i have imagine which i am thee she says sometimes in them the only knows

Generated poetry with seed 'Autumn leaves':
Autumn leaves for you i chyll if that ye wyll a whyle be styll of a comely gyll that dwelt on a

