<a href="https://colab.research.google.com/github/sutharimanikanta/-technity-tasks-/blob/main/sequence_to_sequence_(seq2seq)_model_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import os
import tensorflow as tf
from tensorflow import keras

*A Gated Recurrent Unit (GRU) layer is a type of recurrent neural network (RNN) layer used for processing sequential data, such as text, time series, and audio. It is designed to address some of the limitations of traditional RNNs, particularly regarding the vanishing gradient problem and the difficulty in capturing long-range dependencies in sequences.

* i want use Transformer Encoder Layers

* Bi-directional RNNs:
Purpose: Bi-directional RNNs process sequences in both forward and backward directions.
Strengths:
Capture context from both past and future tokens.
Useful for tasks like part-of-speech tagging, named entity recognition, and sentiment analysis.
Handle sequential dependencies well.
Weaknesses:
Computationally expensive due to bidirectional processing.
Still suffer from vanishing gradient problems.
Example: Bidirectional LSTMs or GRUs.
* Transformer Networks:
Purpose: Transformers revolutionized NLP by introducing self-attention mechanisms.
Strengths:
Parallelizable, making them faster than RNNs.
Capture global context effectively.
State-of-the-art performance on various NLP benchmarks.
Weaknesses:
Require large amounts of data and computational resources.
Lack inherent sequential processing (no recurrence).
Example: BERT, GPT, and other transformer-based models.
When to Choose:

Bi-directional RNNs: Use when you need fine-grained sequential context and have limited data.
Transformers: Opt for transformers when you have abundant data, want to handle long-range dependencies, and aim for top-tier performance.

In [None]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size=1000, embedding_size=128):
        super(Encoder, self).__init__()
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size

    def build(self, input_shape):
        self.embedding_layer = tf.keras.layers.Embedding(self.vocab_size, self.embedding_size)
        self.gru = tf.keras.layers.GRU(self.embedding_size, return_sequences=True, return_state=True)
        self.bi = tf.keras.layers.Bidirectional(self.gru)

    def call(self, inputs):
        words = inputs
        embeddings = self.embedding_layer(words)
        output_sequence, forward_state, backward_state = self.bi(embeddings)
        return output_sequence, forward_state

import tensorflow as tf
import numpy as np

class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, words=20, embedding_size=128):
        super(BahdanauAttention, self).__init__()
        self.words = words
        self.embedding_size = embedding_size

    def build(self, input_shapes):
        self.W1 = self.add_weight(shape=(self.embedding_size, 1), initializer="random_uniform")
        self.W2 = self.add_weight(shape=(self.embedding_size, self.words), initializer="random_uniform")
        self.W3 = self.add_weight(shape=(self.words, self.embedding_size), initializer="random_uniform")

    def call(self, inputs):
        query, value = inputs
        regressed_query = tf.einsum("bij,jk->bik", query, tf.transpose(self.W1))
        regressed_value = tf.einsum("bij,jk->bik", value, self.W2)
        sum_query_value = tf.einsum("bij,bik->bjk", regressed_query, regressed_value)
        sum_of_query_value = tf.nn.tanh(sum_query_value)
        a = tf.einsum("bij,ij->bi", sum_of_query_value, self.W3)
        a = tf.nn.softmax(a)
        context = tf.einsum("bij,bi->bj", value, a)
        return context


class Decoder(tf.keras.layers.Layer):
    def __init__(self, embedding_size=128, vocab_size=1000, words=20):
        super(Decoder, self).__init__()
        self.embedding_size = embedding_size
        self.vocab_size = vocab_size
        self.words = words

    def build(self, input_shapes):
        self.attention = BahdanauAttention(words=self.words, embedding_size=self.embedding_size)
        self.gru = tf.keras.layers.GRU(self.embedding_size, return_sequences=True, return_state=True)
        self.op1 = tf.keras.layers.Dense(self.embedding_size * 10, activation='tanh')
        self.op2 = tf.keras.layers.Dense(self.embedding_size * 10, activation='tanh')
        self.op3 = tf.keras.layers.Dense(self.vocab_size, activation='softmax')

    def call(self, inputs):
        y, state, encode = inputs
        context = self.attention((tf.expand_dims(state,axis=1),encode))
        state_expanded = tf.expand_dims(state, axis=1)
        context_expanded = tf.expand_dims(context, axis=1)
        y_expanded = tf.expand_dims(y, axis=1)
        gru_input = tf.concat([state_expanded, context_expanded], axis=-1)
        gru_input = tf.concat([gru_input, y_expanded], axis=-1)
        new_state = self.gru(gru_input)
        g_input = tf.concat([tf.concat([y, context], axis=-1), new_state[0]], axis=-1)
        g_output = self.op3(self.op2(self.op1(g_input)))
        return g_output, new_state[0]


In [None]:
import tensorflow as tf
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.layers import Embedding

class EnglishToTeluguTranslator:
    def __init__(self, encoder_input_words=20, english_vocab_size=1000, telugu_vocab_size=1000, embedding_size=128,
                 epochs=30, batch_size=200, optimizer='adam'):
        self.encoder_input_words = encoder_input_words
        self.english_vocab_size = english_vocab_size
        self.telugu_vocab_size = telugu_vocab_size
        self.embedding_size = embedding_size
        self.epochs = epochs
        self.batch_size = batch_size
        self.optimizer = optimizer
        self.loss_fn = CategoricalCrossentropy(from_logits=True)
        self.loss_history = []
        self.encoder = Encoder(vocab_size=english_vocab_size, embedding_size=embedding_size)
        self.decoder = None

    def get_enc_dec(self):
        x_encoder_input = tf.keras.layers.Input(self.encoder_input_words)
        encode = Encoder(vocab_size=self.english_vocab_size, embedding_size=self.embedding_size)(x_encoder_input)
        self.encoder = tf.keras.Model(inputs=x_encoder_input, outputs=encode)

        x_decoder_input = tf.keras.layers.Input(1)
        x_decoder = Embedding(self.telugu_vocab_size, self.embedding_size)(x_decoder_input)
        x_state_input = tf.keras.layers.Input(self.embedding_size)
        x_states_input = tf.keras.layers.Input((self.encoder_input_words, self.embedding_size))

        decode = Decoder(embedding_size=self.embedding_size, vocab_size=self.telugu_vocab_size,
                         words=self.encoder_input_words)((x_decoder[:, 0], x_state_input, x_states_input))
        self.decoder = tf.keras.Model(inputs=[x_decoder_input, x_state_input, x_states_input], outputs=decode)
        return self.encoder.summary(), self.decoder.summary()

    def train_translator(self, X_english, X_telugu):
        optimizer = tf.keras.optimizers.Adam()
        loss_fn = self.loss_fn

        epochs, batch_size = self.epochs, self.batch_size
        total_instances = len(X_english)

        self.loss_history = []

        # Initialize encoder and decoder models
        self.get_enc_dec()

        for epoch in range(epochs):
            batch_loss = tf.constant(0.0)
            for batch in tqdm(range(0, total_instances, batch_size)):
                with tf.GradientTape() as tape:
                    loss_count = tf.constant(0.0)
                    x1_train = X_english[batch:batch + batch_size]
                    x2_train = X_telugu[batch:batch + batch_size]

                    H, state = self.encoder(x1_train)

                    for query_number in range(x2_train.shape[-1]):
                        output, state = self.decoder((x2_train[:, query_number], state, H))
                        loss_count = loss_count + loss_fn(x2_train[:, query_number], output)

                grads = tape.gradient(loss_count, self.encoder.trainable_weights + self.decoder.trainable_weights)
                optimizer.apply_gradients(zip(grads, self.encoder.trainable_weights + self.decoder.trainable_weights))
                batch_loss = batch_loss + loss_count

            print("Epoch: " + str(epoch + 1) + "/" + str(epochs) + " : Error " + str(batch_loss.numpy()))
            self.loss_history.append(batch_loss.numpy())

    def translate_sentence(self, english_sentence):
        # Assuming you have a tokenizer initialized and fit on your English text data
        # english_indices = your_tokenizer.texts_to_sequences([english_sentence])[0]
        # english_indices = np.array([english_indices])

        english_indices = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])  # Example input, replace with actual indices

        H, state = self.encoder(english_indices)

        telugu_sentence = []

        start_token = np.array([2])  # Replace 2 with the actual index of the start token in the Telugu vocabulary
        start_token = tf.expand_dims(start_token, axis=0)

        for _ in range(10):  # Replace 10 with the actual maximum length of the Telugu sentence you want to generate
            output, state = self.decoder((start_token, state, H))
            predicted_word_index = np.argmax(output.numpy(), axis=-1)
            telugu_sentence.append(predicted_word_index[0][0])

            if predicted_word_index[0][0] == 3:  # Replace 3 with the actual index of the end token in the Telugu vocabulary
                break

            start_token = predicted_word_index

        return telugu_sentence

* Luong Attention (Scaled Dot-Product Attention):
* Self-Attention (Scaled Dot-Product Attention):

In [None]:
import pandas as pd
import numpy as np

# Step 1: Parse the Excel data
data = pd.read_excel('/content/engtotel.xlsx')

# Drop any rows with NaN values
data = data.dropna()

english_sentences = data['english'].astype(str).tolist()
telugu_sentences = data['telugu'].astype(str).tolist()

# Step 2: Tokenization
english_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
telugu_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')

english_tokenizer.fit_on_texts(english_sentences)
telugu_tokenizer.fit_on_texts(telugu_sentences)

english_sequences = english_tokenizer.texts_to_sequences(english_sentences)
telugu_sequences = telugu_tokenizer.texts_to_sequences(telugu_sentences)

# Step 3: Padding
max_length = max(max(len(seq) for seq in english_sequences), max(len(seq) for seq in telugu_sequences))
english_sequences_padded = tf.keras.preprocessing.sequence.pad_sequences(english_sequences, maxlen=max_length, padding='post')
telugu_sequences_padded = tf.keras.preprocessing.sequence.pad_sequences(telugu_sequences, maxlen=max_length, padding='post')

# Step 4: Data Splitting
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(english_sequences_padded, telugu_sequences_padded, test_size=0.2, random_state=42)

# Step 5: Model Training
translator = EnglishToTeluguTranslator()
translator.train_translator(X_train, y_train)

# Step 6: Evaluation (Optional)
# Evaluate your model's performance on the validation set using metrics like BLEU score or simply by inspecting translations manually.

# Step 7: Inference
english_sentence = "His legs are long."
english_sequence = english_tokenizer.texts_to_sequences([english_sentence])
translated_sequence = translator.translate_sentence(english_sequence)
translated_sentence = telugu_tokenizer.sequences_to_texts([translated_sequence])[0]
print("Translated Sentence:", translated_sentence)


ValueError: Exception encountered when calling layer "decoder" (type Decoder).

in user code:

    File "<ipython-input-6-d2351eb279d2>", line 60, in call  *
        context = self.attention((tf.expand_dims(state,axis=1),encode))
    File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler  **
        raise e.with_traceback(filtered_tb) from None
    File "/tmp/__autograph_generated_filetatr33tl.py", line 11, in tf__call
        regressed_query = ag__.converted_call(ag__.ld(tf).einsum, ('bij,jk->bik', ag__.ld(query), ag__.converted_call(ag__.ld(tf).transpose, (ag__.ld(self).W1,), None, fscope)), None, fscope)

    ValueError: Exception encountered when calling layer 'bahdanau_attention' (type BahdanauAttention).
    
    in user code:
    
        File "<ipython-input-6-d2351eb279d2>", line 34, in call  *
            regressed_query = tf.einsum("bij,jk->bik", query, tf.transpose(self.W1))
    
        ValueError: Dimensions must be equal, but are 128 and 1 for '{{node decoder/bahdanau_attention/einsum/Einsum}} = Einsum[N=2, T=DT_FLOAT, equation="bij,jk->bik"](decoder/ExpandDims, decoder/bahdanau_attention/transpose)' with input shapes: [?,1,128], [1,128].
    
    
    Call arguments received by layer 'bahdanau_attention' (type BahdanauAttention):
      • inputs=('tf.Tensor(shape=(None, 1, 128), dtype=float32)', 'tf.Tensor(shape=(None, 20, 128), dtype=float32)')


Call arguments received by layer "decoder" (type Decoder):
  • inputs=('tf.Tensor(shape=(None, 128), dtype=float32)', 'tf.Tensor(shape=(None, 128), dtype=float32)', 'tf.Tensor(shape=(None, 20, 128), dtype=float32)')