**Import necessary libraries**

In [None]:
import os
import tensorflow as tf
import numpy as np
import pandas as pd
import time
from tensorflow import keras
import re

In [None]:
!wget https://raw.githubusercontent.com/meytiii/Deep-Learning/main/datasets/shahname.csv -O shahname.csv

In [None]:
df = pd.read_csv('shahname.csv')
text = '\n'.join(df['Text'])
# Display the first 250 characters of the text
print(text[:250])

**Preprocess the text: tokenization into words**

In [None]:
text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
words = text.split(' ')
print(f'Total words: {len(words)}')


**Create a vocabulary of unique words**

In [None]:
vocab = sorted(set(words))
print(f'{len(vocab)} unique words')
for v in vocab[:100]:  # Display the first 100 words
    print(f'{v}', end=' ')

# Mapping from words to IDs and vice versa
ids_to_words = keras.layers.StringLookup(vocabulary=vocab, invert=True, mask_token=None)
ids_from_words = keras.layers.StringLookup(vocabulary=vocab, invert=False, mask_token=None)

In [None]:
for v in ids_from_words.get_vocabulary()[:100]:  # Display the first 100 words
    print(v, end=' ')

**Convert text to sequence of IDs**

In [None]:
all_ids = ids_from_words(tf.strings.split(text))
all_ids = tf.squeeze(all_ids)  # Ensure the shape is correct
print(all_ids[:20])  # Display the first 20 IDs

**Define sequence length and batch size**

In [None]:
SEQUENCE_LENGTH = 20  # Since we are working at the word level, we can use a smaller sequence length
BATCH_SIZE = 64
AUTOTUNE = tf.data.experimental.AUTOTUNE

**Create a dataset of sequences**

In [None]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
sequences = ids_dataset.batch(SEQUENCE_LENGTH + 1, drop_remainder=True, num_parallel_calls=AUTOTUNE)

**Split input and target texts**

In [None]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target, num_parallel_calls=AUTOTUNE)

# Create training batches
dataset = dataset.batch(BATCH_SIZE, num_parallel_calls=AUTOTUNE, drop_remainder=True)
dataset = dataset.prefetch(AUTOTUNE)

**Creating and compiling our model**

In [None]:
# Model parameters
VOCAB_SIZE = len(ids_from_words.get_vocabulary())
EMBEDDING_DIM = 256
RNN_UNITS = 1024

# Define the model
class MyModel(keras.Model):
    def __init__(self, vocabulary_size, embedding_dim, rnn_units):
        super(MyModel, self).__init__()
        self.embedding = keras.layers.Embedding(input_dim=vocabulary_size, output_dim=embedding_dim)
        self.gru = keras.layers.GRU(units=rnn_units, return_sequences=True, return_state=True)
        self.dense = keras.layers.Dense(vocabulary_size)

    def call(self, inputs, states=None, return_state=False, training=False):
        x = self.embedding(inputs, training=training)
        if states is None:
            states = self.gru.get_initial_state(x)
        x, states = self.gru(x, initial_state=states, training=training)
        x = self.dense(x, training=training)
        if return_state:
            return x, states
        return x

model = MyModel(vocabulary_size=VOCAB_SIZE, embedding_dim=EMBEDDING_DIM, rnn_units=RNN_UNITS)

# Compile the model
model.compile(optimizer='adam', loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True))

# Set up checkpoints
checkpoints_dir = './temp/chpts/'
checkpoint_prefix = os.path.join(checkpoints_dir, 'chpt_{epoch}')
checkpoint_callback = keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix, save_weights_only=True)

# Train the model
EPOCHS = 30
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

**Generating text and testing our model**

In [None]:
# Define a class for generating text
class OneStep(tf.keras.Model):
    def __init__(self, model, ids_to_words, ids_from_words, temperature=1.0):
        super(OneStep, self).__init__()
        self.temperature = temperature
        self.model = model
        self.ids_to_words = ids_to_words
        self.ids_from_words = ids_from_words
        self.vocab_size = ids_from_words.vocabulary_size()

    @tf.function
    def generate_one_step(self, inputs, states=None):
        # Expand dims to match the expected shape: (batch_size, sequence_length)
        inputs = tf.expand_dims(inputs, 0)  # from (sequence_length,) to (1, sequence_length)

        predicted_logits, states = self.model(inputs, states=states, return_state=True)
        predicted_logits = predicted_logits[:, -1, :]  # get the last time step's output
        predicted_logits = predicted_logits / self.temperature

        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)

        predicted_words = self.ids_to_words(predicted_ids)
        return predicted_words, states

# Generate text
one_step_model = OneStep(model, ids_to_words, ids_from_words, temperature=1.2)  # Adjust temperature

# Start seed text
seed_text = "به نام خداوند جان و خرد"
seed_ids = ids_from_words(tf.strings.split(seed_text))

# Number of words to generate
num_generate = 100

# Poem structure parameters
line_length = 5  # Number of words per line
num_lines = 5    # Number of lines per stanza
num_stanzas = 4  # Number of stanzas

# Generate the poem
generated_poem = seed_text
generated_poem += '\n'  # Start a new line after the seed text

states = None
next_input = seed_ids
word_count = 0

for _ in range(num_generate):
    next_word, states = one_step_model.generate_one_step(next_input, states=states)
    next_word = next_word.numpy()[0].decode('utf-8')  # Convert to string
    generated_poem += ' ' + next_word
    word_count += 1

    # Insert a line break after every `line_length` words
    if word_count % line_length == 0:
        generated_poem += '\n'

    # Insert a stanza break after every `num_lines` lines
    if word_count % (line_length * num_lines) == 0:
        generated_poem += '\n'

    next_input = tf.expand_dims(ids_from_words(tf.constant(next_word)), 0)

print(generated_poem)

به نام خداوند جان و خرد
 بر دژم مر آن را
 جفا را برادر بود خرد
 را زن و منم بر
 گناه فزون بود سر چون
 جهان پهلوان بزرگان هم از

 دین این هر دو آشکار
 سپر بر سر و افسر
 و ترگ و مهر به
 خوبی هر آن نامه را
 خوب چهر پس پشت این

 شاه برتر ز مهر برآورد
 تخت جهاندار آگنده شاه نهان
 گشت پیدا ز تخت بود
 یکی گم ز این سپس
 با بهشت همان تخت فرخ

 سرو سیمین پدیدار هر چه
 فرمان کنید برو نیز با
 خود بهم پر ز روشن
 کند اگر یار باشم به
 باغ اندرون همان نیز گوهر


