In [None]:
#step 1 load data 
#step 2 chose a archtiture of LSTM (Bidirectional Stacked)
#step 3 Train 
#step 4 test 

In [None]:
import pandas as pd
df=pd.read_csv("")
df.head()

In [None]:
x=df[['Poetry'][0]]
x[0]

In [None]:
x[1]

In [None]:
df['Poet'].unique()

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader

# Hyperparameters
EMBEDDING_DIM = 128
HIDDEN_DIM = 256
NUM_LAYERS = 2
BATCH_SIZE = 32
LEARNING_RATE = 0.001
EPOCHS = 10
SEQ_LENGTH = 10

# Load dataset
df = pd.read_csv("/kaggle/input/roman-poetry/Roman-Urdu-Poetry.csv")  # Update with the correct file path
poems = df["Poetry"].tolist()
poets = df["Poet"].tolist()

# Tokenize and build vocabulary
words = set()
for poem in poems:
    words.update(poem.split())
word_to_idx = {word: i for i, word in enumerate(words)}
idx_to_word = {i: word for word, i in word_to_idx.items()}
poet_to_idx = {poet: i for i, poet in enumerate(set(poets))}

# Poetry Dataset
class PoetryDataset(Dataset):
    def __init__(self, poems, poets, word_to_idx, poet_to_idx, seq_length=SEQ_LENGTH):
        self.poems = poems
        self.poets = poets
        self.word_to_idx = word_to_idx
        self.poet_to_idx = poet_to_idx
        self.seq_length = seq_length
        
        self.data = []
        for poem, poet in zip(poems, poets):
            encoded_poem = [word_to_idx[word] for word in poem.split() if word in word_to_idx]
            poet_idx = poet_to_idx[poet]
            for i in range(len(encoded_poem) - seq_length):
                self.data.append((encoded_poem[i:i+seq_length], encoded_poem[i+1:i+seq_length+1], poet_idx))
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        x, y, poet = self.data[idx]
        return torch.tensor(x), torch.tensor(y), torch.tensor(poet)

# Define LSTM Model
class PoetryLSTM(nn.Module):
    def __init__(self, vocab_size, poet_count, embedding_dim, hidden_dim, num_layers):
        super(PoetryLSTM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.poet_embedding = nn.Embedding(poet_count, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim * 2, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)
    
    def forward(self, x, poet):
        word_embeds = self.embedding(x)  # (batch, seq_len, embed_dim)
        poet_embeds = self.poet_embedding(poet).unsqueeze(1).repeat(1, x.size(1), 1)  # (batch, seq_len, embed_dim)
        combined = torch.cat((word_embeds, poet_embeds), dim=2)
        lstm_out, _ = self.lstm(combined)
        out = self.fc(lstm_out)
        return out

# Create Dataset and Dataloader
dataset = PoetryDataset(poems, poets, word_to_idx, poet_to_idx)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# Model Initialization
vocab_size = len(word_to_idx)
poet_count = len(poet_to_idx)
model = PoetryLSTM(vocab_size, poet_count, EMBEDDING_DIM, HIDDEN_DIM, NUM_LAYERS).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()

# Training Function
def train_model(model, dataloader, optimizer, criterion, epochs):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for x, y, poet in dataloader:
            x, y, poet = x.to(torch.device("cuda" if torch.cuda.is_available() else "cpu")), y.to(torch.device("cuda" if torch.cuda.is_available() else "cpu")), poet.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
            optimizer.zero_grad()
            output = model(x, poet)
            loss = criterion(output.view(-1, vocab_size), y.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(dataloader)}")

# Train Model
train_model(model, dataloader, optimizer, criterion, EPOCHS)


Epoch 1/10, Loss: 5.805823723793454
Epoch 2/10, Loss: 3.703525646347229
Epoch 3/10, Loss: 2.4856539940271687
Epoch 4/10, Loss: 1.7547306208065154
Epoch 5/10, Loss: 1.3084892377118085
Epoch 6/10, Loss: 1.038669454677401
Epoch 7/10, Loss: 0.8765590877521288
Epoch 8/10, Loss: 0.7753250928497017
Epoch 9/10, Loss: 0.7080525423043242
Epoch 10/10, Loss: 0.6620812296442949


In [4]:
def generate_poetry(model, start_words, poet_name, max_length=50):
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Convert poet name to index
    if poet_name not in poet_to_idx:
        print("Poet not found in dataset.")
        return ""
    poet_idx = torch.tensor([poet_to_idx[poet_name]], device=device)
    
    # Convert start words to indices
    input_indices = [word_to_idx[word] for word in start_words.split() if word in word_to_idx]
    if not input_indices:
        print("No valid words found in vocabulary.")
        return ""
    
    input_tensor = torch.tensor(input_indices, dtype=torch.long, device=device).unsqueeze(0)  # Add batch dim

    generated_words = start_words.split()
    
    for _ in range(max_length):
        with torch.no_grad():
            output = model(input_tensor, poet_idx)  # Forward pass
            predictions = output[:, -1, :]  # Get last token's predictions
            next_word_idx = torch.argmax(predictions, dim=1).item()  # Choose the most probable word
            
            if next_word_idx in idx_to_word:
                next_word = idx_to_word[next_word_idx]
                generated_words.append(next_word)
                input_tensor = torch.cat((input_tensor, torch.tensor([[next_word_idx]], device=device)), dim=1)
            else:
                break  # Stop if an invalid word is predicted

    return " ".join(generated_words)


In [5]:
poet_name = "ahmad-faraz"
start_words = "aañkh se duur"
generated_poem = generate_poetry(model, start_words, poet_name)
print("Generated Poem:\n", generated_poem)


Generated Poem:
 aañkh se duur bhī hai jaise ham apne hī sāmne aa.e aañkh ko be-sabab kyuuñ nahīñ dete rahzan ho to hāzir hai matā-e-dil-o-jāñ bhī rahbar ho to manzil kā pata kyuuñ nahīñ dete kyā biit ga.ī ab ke 'farāz' ahl-e-chaman par yārān-e-qafas mujh ko sadā kyuuñ chāhiye ḳhudā ho jaa.eñ ham bhī majbūriyoñ


In [43]:
poet_name = "allama-iqbal"
start_words = "pyaar"
generated_poem = generate_poetry(model, start_words, poet_name)
print("Generated Poem:\n", generated_poem)


Generated Poem:
 pyaar thā kisī darmāñda rah-rau kī sadā-e-dardnāk jis ko āvāz-e-rahīl-e-kārvāñ samjhā thā maiñ kah ga.iiñ rāz-e-mohabbat parda-dārī-hā-e-shauq thī fuġhāñ vo bhī jise zabt-e-fuġhāñ samjhā thā maiñ thī kisī darmāñda rah-rau kī sadā-e-dardnāk jis ko āvāz-e-rahīl-e-kārvāñ samjhā thā maiñ kah ga.iiñ rāz-e-mohabbat parda-dārī-hā-e-shauq thī fuġhāñ vo bhī jise zabt-e-fuġhāñ samjhā thā maiñ


In [6]:
poet_name = "faiz-ahmad-faiz"
start_words = "pyaar"
generated_poem = generate_poetry(model, start_words, poet_name)
print("Generated Poem:\n", generated_poem)


Generated Poem:
 pyaar to ghar jā.eñge kis qadar hogā yahāñ mehr-o-vafā kā mātam ham tirī yaad se jis roz utar jā.eñge jauharī band kiye jaate haiñ bāzār-e-suḳhan ham kise bechne almās-o-guhar jā.eñge nemat-e-zīst kā ye qarz chukegā kaise laakh ghabrā ke ye kahte raheñ mar jā.eñge shāyad apnā bhī koī bait hudī-ḳhvāñ ban


In [7]:
torch.save(model.state_dict(), "poetry_lstm.pth")
print("Model saved as poetry_lstm.pth")


Model saved as poetry_lstm.pth


In [10]:
import gradio as gr
import torch

# Ensure the model is on the correct device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Poetry Generation Function (same as your previous implementation)
def generate_poetry(start_words, poet_name, max_length=50):
    model.eval()
    
    # Convert poet name to index
    if poet_name not in poet_to_idx:
        return "Poet not found in dataset."
    poet_idx = torch.tensor([poet_to_idx[poet_name]], device=device)
    
    # Convert start words to indices
    input_indices = [word_to_idx[word] for word in start_words.split() if word in word_to_idx]
    if not input_indices:
        return "No valid words found in vocabulary."
    
    input_tensor = torch.tensor(input_indices, dtype=torch.long, device=device).unsqueeze(0)
    generated_words = start_words.split()
    
    for _ in range(max_length):
        with torch.no_grad():
            output = model(input_tensor, poet_idx)
            predictions = output[:, -1, :]
            next_word_idx = torch.argmax(predictions, dim=1).item()
            
            if next_word_idx in idx_to_word:
                next_word = idx_to_word[next_word_idx]
                generated_words.append(next_word)
                input_tensor = torch.cat((input_tensor, torch.tensor([[next_word_idx]], device=device)), dim=1)
            else:
                break
    
    return " ".join(generated_words)

# Define Gradio Interface
def poetry_app(start_words, poet_name):
    return generate_poetry(start_words, poet_name)

# Gradio UI
theme = gr.themes.Soft(primary_hue="purple", secondary_hue="blue")
with gr.Blocks(theme=theme) as app:
    gr.Markdown("""
    # ✨ Futuristic Roman Urdu Poetry Generator ✨
    **Generate poetry from legendary poets using AI**
    """)
    
    with gr.Row():
        poet_name = gr.Dropdown(choices=list(poet_to_idx.keys()), label="Select Poet", interactive=True)
        start_words = gr.Textbox(label="Enter Starting Words", placeholder="aañkh se duur")
    
    btn_generate = gr.Button("Generate Poetry ✍️")
    output = gr.Textbox(label="Generated Poetry", interactive=False, lines=6)
    
    btn_generate.click(poetry_app, inputs=[start_words, poet_name], outputs=output)
    
    gr.Markdown(""" 
    ### 🌌 Features
    - Choose from famous poets like **Allama Iqbal**, **Ahmad Faraz**, etc.
    - Start poetry with a custom phrase
    - Futuristic UI with dark mode aesthetics 🌙
    """)

# Launch App
app.launch(share=True)


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://fee90f8f85411968c3.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [9]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.16.0-py3-none-any.whl.metadata (16 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.8-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.0 (from gradio)
  Downloading gradio_client-1.7.0-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.9.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)
  Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)
Collecting semantic-version~=2.0 (from gradio)
  Downloading semantic_version-2.1