<a href="https://colab.research.google.com/github/syedfasihzaidi480/Poetry-generator/blob/main/poetry_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow



In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm  # For progress bar

In [None]:
# Load dataset
df = pd.read_csv("/kaggle/input/dataset/Roman-Urdu-Poetry.csv", delimiter=",", quotechar='"', encoding="utf-8")
df.columns = ["ID", "Poet", "Poetry"]


In [None]:
# Preprocessing
poetry_lines = df['Poetry'].dropna().tolist()

In [None]:
# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(poetry_lines)
total_words = len(tokenizer.word_index) + 1  # Vocabulary size


In [None]:
# Create sequences
input_sequences = []
for line in poetry_lines:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        input_sequences.append(token_list[:i+1])

In [None]:
# Pad sequences
max_sequence_length = max(len(seq) for seq in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')


In [None]:
# Split input & output
X, y = input_sequences[:, :-1], input_sequences[:, -1]
y = to_categorical(y, num_classes=total_words)

In [None]:
# Build LSTM Model
model = Sequential([
    Embedding(total_words, 128, input_length=max_sequence_length - 1),
    LSTM(256, return_sequences=True),
    Dropout(0.3),
    LSTM(128),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(total_words, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])




In [None]:
# Training with TQDM
epochs = 10
batch_size = 64

for epoch in tqdm(range(epochs), desc="Training Progress", unit="epoch"):
    model.fit(X, y, batch_size=batch_size, epochs=1, verbose=1)


Training Progress:   0%|          | 0/10 [00:00<?, ?epoch/s]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m243s[0m 82ms/step - accuracy: 0.0593 - loss: 6.7966


Training Progress:  10%|█         | 1/10 [04:29<40:21, 269.11s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 84ms/step - accuracy: 0.0808 - loss: 6.3041


Training Progress:  20%|██        | 2/10 [08:51<35:22, 265.27s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 84ms/step - accuracy: 0.0948 - loss: 6.1381


Training Progress:  30%|███       | 3/10 [13:14<30:47, 263.93s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 83ms/step - accuracy: 0.1023 - loss: 6.0201


Training Progress:  40%|████      | 4/10 [17:35<26:17, 262.97s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m240s[0m 83ms/step - accuracy: 0.1105 - loss: 5.9065


Training Progress:  50%|█████     | 5/10 [21:56<21:50, 262.18s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 83ms/step - accuracy: 0.1188 - loss: 5.8009


Training Progress:  60%|██████    | 6/10 [26:17<17:27, 261.98s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 83ms/step - accuracy: 0.1230 - loss: 5.6988


Training Progress:  70%|███████   | 7/10 [30:39<13:05, 261.88s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m240s[0m 83ms/step - accuracy: 0.1287 - loss: 5.6255


Training Progress:  80%|████████  | 8/10 [35:00<08:43, 261.56s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m240s[0m 83ms/step - accuracy: 0.1302 - loss: 5.5518


Training Progress:  90%|█████████ | 9/10 [39:20<04:21, 261.19s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 84ms/step - accuracy: 0.1337 - loss: 5.4714


Training Progress: 100%|██████████| 10/10 [43:43<00:00, 262.31s/epoch]


In [None]:
# Save the model
model.save("roman_urdu_poetry_model.h5")


In [None]:
# Poetry generation function with temperature scaling
def generate_poetry(seed_text, next_words=150, temperature=1.0):
    """
    Generates poetry given a seed text using LSTM model.
    """
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_length - 1, padding='pre')

        predictions = model.predict(token_list, verbose=0)[0]
        predictions = np.log(predictions + 1e-10) / temperature
        exp_preds = np.exp(predictions)
        probabilities = exp_preds / np.sum(exp_preds)

        predicted_index = np.random.choice(len(probabilities), p=probabilities)
        predicted_word = tokenizer.index_word.get(predicted_index, '')

        seed_text += " " + predicted_word
    return seed_text

In [None]:
# Example poetry generation
print(generate_poetry("teri ankhon kay siwa is duniya mein  ", next_words=130, temperature=0.8))

teri ankhon kay siwa is duniya mein   ki ham nazar se bahut mujh ko khel hoga ki kya karte hamen tak khvar hi nahin aata tu nazar hain ki aaj ye jo thiin ud ke bhi ai zindagi shahr men vahi shahr men in shatranj safar dil sar e dil to ja har shakhs ho to ho na haivan ki chal zara har ik mujh par vo log apne liye hi ab bhi kahiye ham to aur hai umr dil ki kam bhi nikle aaj hote par is ki kuchh na jaata hai raah zaban men kiya gar bhi ye din e koh e visal nahin ki hai nahin jahan hai gar to ham vo bach kar karte hue kuchh aur ham ne tire jahan men sar kiye hain ye siine men dil men vo ham har
