<a href="https://colab.research.google.com/github/krishnadixit05/Deep-Learning/blob/main/Encoder_Decoder_Q_A_model_using_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Bidirectional, Dense, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer


In [23]:
# Sample input and output sentence pairs
input_texts = ["what is the full form of ICT", "How many students are there in your class", "How many semesters are in ICT", "What is the thing that differ ICT stuent then others", "how is faculty feedback of ICT dep"]
target_texts = ["Information and Communication Technology", "60 students","8 sem", "discipline", "good"]

# Add <START> and <END> tokens in target texts
target_texts = ["<START> " + text + " <END>" for text in target_texts]

# Define tokenizer
tokenizer = Tokenizer(filters='')
tokenizer.fit_on_texts(input_texts + target_texts)

# Convert words to sequences
input_sequences = tokenizer.texts_to_sequences(input_texts)
target_sequences = tokenizer.texts_to_sequences(target_texts)

# Vocabulary size
vocab_size = len(tokenizer.word_index) + 1

# Find max sentence length for padding
max_length = max([len(seq) for seq in input_sequences + target_sequences])

# Pad sequences
input_sequences = pad_sequences(input_sequences, maxlen=max_length, padding='post')
target_sequences = pad_sequences(target_sequences, maxlen=max_length, padding='post')

# Split target sequences into input (X) and output (y) for training
decoder_input_data = target_sequences[:, :-1]  # Remove <END>
decoder_target_data = target_sequences[:, 1:]  # Remove <START>

In [19]:
# Define model parameters
embedding_dim = 128
latent_dim = 256

# Encoder
encoder_inputs = Input(shape=(max_length,))
enc_emb = Embedding(vocab_size, embedding_dim, mask_zero=True)(encoder_inputs)
encoder_lstm = Bidirectional(LSTM(latent_dim, return_state=True))
encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder_lstm(enc_emb)

# Merge forward and backward LSTM states
state_h = tf.keras.layers.Concatenate()([forward_h, backward_h])
state_c = tf.keras.layers.Concatenate()([forward_c, backward_c])

# Decoder
decoder_inputs = Input(shape=(max_length-1,))
dec_emb = Embedding(vocab_size, embedding_dim, mask_zero=True)(decoder_inputs)
decoder_lstm = LSTM(latent_dim * 2, return_sequences=True, return_state=True)  # BiLSTM doubles latent_dim
decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=[state_h, state_c])
decoder_dense = Dense(vocab_size, activation="softmax")
decoder_outputs = decoder_dense(decoder_outputs)

# Define model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])


In [20]:
# Expand dimensions for sparse categorical loss
decoder_target_data = np.expand_dims(decoder_target_data, -1)

# Train the model
model.fit([input_sequences, decoder_input_data], decoder_target_data, batch_size=16, epochs=50, verbose=1)


Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 9s/step - accuracy: 0.0444 - loss: 3.6117
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step - accuracy: 0.6889 - loss: 3.5782
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 333ms/step - accuracy: 0.6667 - loss: 3.5403
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 193ms/step - accuracy: 0.6667 - loss: 3.4894
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 302ms/step - accuracy: 0.6667 - loss: 3.4135
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step - accuracy: 0.6667 - loss: 3.2938
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 207ms/step - accuracy: 0.6667 - loss: 3.1001
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 194ms/step - accuracy: 0.6667 - loss: 2.7962
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

<keras.src.callbacks.history.History at 0x7eb7db7ab450>

In [21]:
# Encoder model for inference
encoder_model = Model(encoder_inputs, [state_h, state_c])

# Decoder model for inference
decoder_state_input_h = Input(shape=(latent_dim * 2,))
decoder_state_input_c = Input(shape=(latent_dim * 2,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

# Redefine decoder_emb within the decoder model's scope
dec_emb2 = Embedding(vocab_size, embedding_dim, mask_zero=True)(decoder_inputs)
decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=decoder_states_inputs)
decoder_states = [state_h2, state_c2]
decoder_outputs2 = decoder_dense(decoder_outputs2)

decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs2] + decoder_states)

In [24]:
# Function to encode input sentence
def encode_input_sentence(sentence):
    seq = tokenizer.texts_to_sequences([sentence])
    return pad_sequences(seq, maxlen=max_length, padding='post')

# Function to decode predicted sequence
def decode_sequence(input_sentence):
    # Encode input sentence
    states_value = encoder_model.predict(encode_input_sentence(input_sentence))

    # Generate <START> token sequence
    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = tokenizer.word_index['<start>']

    decoded_sentence = ''

    for _ in range(max_length - 1):
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)

        # Select highest probability word index
        sampled_token_index = np.argmax(output_tokens[0, -1, :])

        # If <END> token is reached, stop decoding
        if sampled_token_index == tokenizer.word_index['<end>']:
            break

        # Convert index to word
        sampled_word = tokenizer.index_word[sampled_token_index]
        decoded_sentence += ' ' + sampled_word

        # Update target sequence and states
        target_seq[0, 0] = sampled_token_index
        states_value = [h, c]

    return decoded_sentence.strip()

# Test with a sentence
input_sentence = "what is the full form of ICT"
output_sentence = decode_sequence(input_sentence)
print("Input:", input_sentence)
print("Predicted Output:", output_sentence)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Input: what is the full form of ICT
Predicted Output: information and communication technology
