In [1]:
!pip install tensorflow



In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm  # For progress bar

In [3]:
# Load dataset
df = pd.read_csv("/kaggle/input/dataset-poetry/Roman-Urdu-Poetry.csv", delimiter=",", quotechar='"', encoding="utf-8")
df.columns = ["ID", "Poet", "Poetry"]

In [4]:
# Preprocessing
poetry_lines = df['Poetry'].dropna().tolist()

In [5]:
# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(poetry_lines)
total_words = len(tokenizer.word_index) + 1  # Vocabulary size

In [6]:
# Create sequences
input_sequences = []
for line in poetry_lines:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        input_sequences.append(token_list[:i+1])

In [7]:
# Pad sequences
max_sequence_length = max(len(seq) for seq in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')

In [8]:
print(max_sequence_length)

566


In [9]:
import pickle

In [10]:
# Save the tokenizer
with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

In [11]:
# Create and save character to index mapping
text = ' '.join(poetry_lines)
chars = sorted(list(set(text)))
char_to_idx = {char: idx for idx, char in enumerate(chars)}
idx_to_char = {idx: char for idx, char in enumerate(chars)}

In [12]:
# Save the mappings
with open('char_to_idx.pkl', 'wb') as f:
    pickle.dump(char_to_idx, f)
    
with open('idx_to_char.pkl', 'wb') as f:
    pickle.dump(idx_to_char, f)

In [13]:
# Split input & output
X, y = input_sequences[:, :-1], input_sequences[:, -1]
y = to_categorical(y, num_classes=total_words)

In [14]:
# Build LSTM Model
model = Sequential([
    Embedding(total_words, 128, input_length=max_sequence_length - 1),
    LSTM(256, return_sequences=True),
    Dropout(0.3),
    LSTM(128),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(total_words, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])



In [15]:
# Training with TQDM
epochs = 10
batch_size = 64

for epoch in tqdm(range(epochs), desc="Training Progress", unit="epoch"):
    model.fit(X, y, batch_size=batch_size, epochs=1, verbose=1)

Training Progress:   0%|          | 0/10 [00:00<?, ?epoch/s]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 91ms/step - accuracy: 0.0594 - loss: 6.8019


Training Progress:  10%|█         | 1/10 [04:54<44:09, 294.43s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 91ms/step - accuracy: 0.0805 - loss: 6.3230


Training Progress:  20%|██        | 2/10 [09:37<38:23, 287.89s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 91ms/step - accuracy: 0.0908 - loss: 6.1619


Training Progress:  30%|███       | 3/10 [14:20<33:18, 285.56s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 91ms/step - accuracy: 0.1004 - loss: 6.0335


Training Progress:  40%|████      | 4/10 [19:03<28:27, 284.58s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 91ms/step - accuracy: 0.1113 - loss: 5.9127


Training Progress:  50%|█████     | 5/10 [23:46<23:39, 283.94s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 91ms/step - accuracy: 0.1186 - loss: 5.8079


Training Progress:  60%|██████    | 6/10 [28:29<18:54, 283.71s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 91ms/step - accuracy: 0.1236 - loss: 5.7207


Training Progress:  70%|███████   | 7/10 [33:11<14:09, 283.12s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 91ms/step - accuracy: 0.1249 - loss: 5.6447


Training Progress:  80%|████████  | 8/10 [37:53<09:25, 282.68s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 90ms/step - accuracy: 0.1313 - loss: 5.5768


Training Progress:  90%|█████████ | 9/10 [42:34<04:42, 282.34s/epoch]

[1m2888/2888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 91ms/step - accuracy: 0.1322 - loss: 5.5096


Training Progress: 100%|██████████| 10/10 [47:17<00:00, 283.78s/epoch]


In [16]:
# Save the model
model.save("roman_urdu_poetry_model.h5")

In [17]:
# Poetry generation function with temperature scaling
def generate_poetry(seed_text, next_words=150, temperature=1.0):
    """
    Generates poetry given a seed text using LSTM model.
    """
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_length - 1, padding='pre')
        
        predictions = model.predict(token_list, verbose=0)[0]
        predictions = np.log(predictions + 1e-10) / temperature
        exp_preds = np.exp(predictions)
        probabilities = exp_preds / np.sum(exp_preds)
        
        predicted_index = np.random.choice(len(probabilities), p=probabilities)
        predicted_word = tokenizer.index_word.get(predicted_index, '')

        seed_text += " " + predicted_word
    return seed_text

In [19]:
# Example poetry generation
print(generate_poetry("teri ankhon kay siwa is duniya mein", next_words=130, temperature=0.8))

teri ankhon kay siwa is duniya mein men lete hain duur kiije kariye zindagi ham men vo to baat ki agar jagta jo aur pahle to sitaron ke liye kyuun use na i ham ne 'dagh' jo kya hai jin ki aur aate hain to aah se ham ke badan ke ru koi kis ki kab gardish e la ina e khuda hai vo jo samjha hai jis ko main ne bhi puchho hai jo vo mahmil ka ki tujh se main dil tak dil valon ki ustukhvan aage ham ko tum mujhe kyuun nikalte huun le nahin sakta khushbu matam hote jaa e na saval kya khayal e piri ki na barpa mujh par hi kar lena kya kya hai kuchh khula hosh ke phiri na tha sham e khudi men kya rah e rah aa
