#Libraries

In [None]:
import streamlit as st
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.optimizers import Adam
from keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
import pickle

# Dataset Loading (Using first 700 entries)
poetry_data = pd.read_csv("roman urdu poetry.csv").iloc[:700]
poetry_lines = poetry_data["Poetry"].dropna().tolist()

# Text Processing
combined_text = " ".join(poetry_lines)
all_words = combined_text.split()

# Encoding words
encoder = LabelEncoder()
encoder.fit(all_words)

word_to_idx = {word: idx for idx, word in enumerate(encoder.classes_)}
idx_to_word = {idx: word for word, idx in word_to_idx.items()}

# Creating sequences
sequence_data = []
for i in range(len(all_words) - 5):
    sequence_data.append([word_to_idx[word] for word in all_words[i: i + 6]])

sequence_data = np.array(sequence_data)
X, y = sequence_data[:, :-1], sequence_data[:, -1]

# Model Building
model = Sequential([
    Embedding(input_dim=len(word_to_idx), output_dim=50, input_length=X.shape[1]),
    GRU(100, return_sequences=False),
    Dense(len(word_to_idx), activation='softmax')
])

# Compile Model
model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Model Training
model.fit(X, y, epochs=30, verbose=2)

# Saving the Model
model.save("poetry_gru_model.h5")
with open("word_encoder.pkl", "wb") as f:
    pickle.dump(encoder, f)

# Streamlit Interface
st.title("Roman Urdu Nazam Generator")
user_input = st.text_input("Start your Nazam:")

if st.button("Generate"):
    input_words = user_input.split()
    for _ in range(20):  # Generate 20 additional words
        input_seq = [word_to_idx.get(word, 0) for word in input_words[-5:]]
        input_seq_padded = pad_sequences([input_seq], maxlen=5)
        predicted_idx = np.argmax(model.predict(input_seq_padded), axis=-1)[0]
        next_word = idx_to_word[predicted_idx]
        input_words.append(next_word)

    st.write(" ".join(input_words))




Epoch 1/30
2842/2842 - 23s - 8ms/step - accuracy: 0.0436 - loss: 6.9751
Epoch 2/30
2842/2842 - 22s - 8ms/step - accuracy: 0.0623 - loss: 6.4712
Epoch 3/30
2842/2842 - 22s - 8ms/step - accuracy: 0.0948 - loss: 6.1006
Epoch 4/30
2842/2842 - 22s - 8ms/step - accuracy: 0.1187 - loss: 5.7179
Epoch 5/30
2842/2842 - 22s - 8ms/step - accuracy: 0.1414 - loss: 5.3380
Epoch 6/30
2842/2842 - 22s - 8ms/step - accuracy: 0.1640 - loss: 4.9707
Epoch 7/30
2842/2842 - 21s - 7ms/step - accuracy: 0.1959 - loss: 4.6172
Epoch 8/30
2842/2842 - 21s - 7ms/step - accuracy: 0.2330 - loss: 4.2805
Epoch 9/30
2842/2842 - 22s - 8ms/step - accuracy: 0.2779 - loss: 3.9607
Epoch 10/30
2842/2842 - 22s - 8ms/step - accuracy: 0.3232 - loss: 3.6605
Epoch 11/30
2842/2842 - 22s - 8ms/step - accuracy: 0.3655 - loss: 3.3806
Epoch 12/30
2842/2842 - 22s - 8ms/step - accuracy: 0.4056 - loss: 3.1263
Epoch 13/30
2842/2842 - 22s - 8ms/step - accuracy: 0.4455 - loss: 2.8984
Epoch 14/30
2842/2842 - 22s - 8ms/step - accuracy: 0.4796 - 

2025-02-08 22:17:14.463 
  command:

    streamlit run c:\Users\shaiiikh\AppData\Local\Programs\Python\Python312\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-02-08 22:17:14.467 Session state does not function when running a script without `streamlit run`
