In [None]:
# Import required libraries
import streamlit as st  # For creating web interface
import numpy as np  # For numerical operations
import pandas as pd  # For data manipulation
import tensorflow as tf  # Deep learning framework
from tensorflow.keras.models import Sequential  # For creating sequential neural network
from tensorflow.keras.layers import Embedding, GRU, Dense  # Neural network layers
from tensorflow.keras.optimizers import Adam  # Optimizer for training
from keras.preprocessing.sequence import pad_sequences  # For padding input sequences
from sklearn.preprocessing import LabelEncoder  # For encoding words to numbers
import pickle  # For saving and loading Python objects

# Load and preprocess the dataset
# Reading only first 700 entries from the CSV file to reduce computation
poetry_data = pd.read_csv("roman urdu poetry.csv").iloc[:700]
poetry_lines = poetry_data["Poetry"].dropna().tolist()  # Remove any null values and convert to list

# Combine all poetry lines into a single text and split into words
combined_text = " ".join(poetry_lines)  # Join all poetry lines with space
all_words = combined_text.split()  # Split text into individual words

# Convert words to numerical format
encoder = LabelEncoder()  # Initialize label encoder
encoder.fit(all_words)  # Fit encoder on all unique words

# Create mapping dictionaries for words to indices and vice versa
word_to_idx = {word: idx for idx, word in enumerate(encoder.classes_)}  # Word to index mapping
idx_to_word = {idx: word for word, idx in word_to_idx.items()}  # Index to word mapping

# Create training sequences
# Each sequence contains 6 words (5 input words and 1 target word)
sequence_data = []
for i in range(len(all_words) - 5):
    sequence_data.append([word_to_idx[word] for word in all_words[i: i + 6]])

# Convert to numpy array and split into input (X) and target (y)
sequence_data = np.array(sequence_data)
X, y = sequence_data[:, :-1], sequence_data[:, -1]  # X: first 5 words, y: last word

# Define the neural network architecture
model = Sequential([
    Embedding(input_dim=len(word_to_idx), output_dim=50, input_length=X.shape[1]),  # Word embedding layer
    GRU(100, return_sequences=False),  # GRU layer for sequence processing
    Dense(len(word_to_idx), activation='softmax')  # Output layer for word prediction
])

# Configure model training parameters
model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=30, verbose=2)  # Train for 30 epochs

# Save the trained model and word encoder
model.save("poetry_gru_model.h5")  # Save neural network model
with open("word_encoder.pkl", "wb") as f:  # Save word encoder
    pickle.dump(encoder, f)

# Create Streamlit web interface
st.title("Roman Urdu Nazam Generator")
user_input = st.text_input("Start your Nazam:")  # Get initial words from user

# Generate poetry when user clicks the button
if st.button("Generate"):
    input_words = user_input.split()  # Split input into words
    for _ in range(20):  # Generate 20 additional words
        input_seq = [word_to_idx.get(word, 0) for word in input_words[-5:]]  # Get last 5 words
        input_seq_padded = pad_sequences([input_seq], maxlen=5)  # Pad sequence to fixed length
        predicted_idx = np.argmax(model.predict(input_seq_padded), axis=-1)[0]  # Predict next word
        next_word = idx_to_word[predicted_idx]  # Convert predicted index to word
        input_words.append(next_word)  # Add predicted word to sequence

    st.write(" ".join(input_words))  # Display generated poetry



Epoch 1/30
2842/2842 - 23s - 8ms/step - accuracy: 0.0436 - loss: 6.9751
Epoch 2/30
2842/2842 - 22s - 8ms/step - accuracy: 0.0623 - loss: 6.4712
Epoch 3/30
2842/2842 - 22s - 8ms/step - accuracy: 0.0948 - loss: 6.1006
Epoch 4/30
2842/2842 - 22s - 8ms/step - accuracy: 0.1187 - loss: 5.7179
Epoch 5/30
2842/2842 - 22s - 8ms/step - accuracy: 0.1414 - loss: 5.3380
Epoch 6/30
2842/2842 - 22s - 8ms/step - accuracy: 0.1640 - loss: 4.9707
Epoch 7/30
2842/2842 - 21s - 7ms/step - accuracy: 0.1959 - loss: 4.6172
Epoch 8/30
2842/2842 - 21s - 7ms/step - accuracy: 0.2330 - loss: 4.2805
Epoch 9/30
2842/2842 - 22s - 8ms/step - accuracy: 0.2779 - loss: 3.9607
Epoch 10/30
2842/2842 - 22s - 8ms/step - accuracy: 0.3232 - loss: 3.6605
Epoch 11/30
2842/2842 - 22s - 8ms/step - accuracy: 0.3655 - loss: 3.3806
Epoch 12/30
2842/2842 - 22s - 8ms/step - accuracy: 0.4056 - loss: 3.1263
Epoch 13/30
2842/2842 - 22s - 8ms/step - accuracy: 0.4455 - loss: 2.8984
Epoch 14/30
2842/2842 - 22s - 8ms/step - accuracy: 0.4796 - 

2025-02-08 22:17:14.463 
  command:

    streamlit run c:\Users\shaiiikh\AppData\Local\Programs\Python\Python312\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-02-08 22:17:14.467 Session state does not function when running a script without `streamlit run`
