<a href="https://colab.research.google.com/github/nikhilRajput-prog/Deep-Learning-Lab-File/blob/main/Deep_Learning_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Experiment -> 6

In [None]:
!pip install tensorflow nltk




In [None]:
import numpy as np
import tensorflow as tf
import re
import string
import random
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense
from tensorflow.keras.models import Model
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

In [None]:
path = "/content/spa.txt"

lines = open(path, encoding='utf-8').read().strip().split('\n')

eng_sentences = []
spa_sentences = []

for line in lines:
    eng, spa = line.split('\t')

    eng = eng.lower()
    spa = spa.lower()

    # add start and end tokens
    spa = "<start> " + spa + " <end>"

    eng_sentences.append(eng)
    spa_sentences.append(spa)

print("Total sentence pairs:", len(eng_sentences))


Total sentence pairs: 118964


In [None]:
MAX_SAMPLES = 40000

eng_sentences = eng_sentences[:MAX_SAMPLES]
spa_sentences = spa_sentences[:MAX_SAMPLES]


In [None]:
eng_train, eng_temp, spa_train, spa_temp = train_test_split(
    eng_sentences, spa_sentences, test_size=0.2, random_state=42)

eng_val, eng_test, spa_val, spa_test = train_test_split(
    eng_temp, spa_temp, test_size=0.5, random_state=42)

print(len(eng_train), len(eng_val), len(eng_test))


32000 4000 4000


In [None]:
# English tokenizer
eng_tokenizer = Tokenizer(filters='')
eng_tokenizer.fit_on_texts(eng_train)

# Spanish tokenizer
spa_tokenizer = Tokenizer(filters='')
spa_tokenizer.fit_on_texts(spa_train)

eng_vocab_size = len(eng_tokenizer.word_index) + 1
spa_vocab_size = len(spa_tokenizer.word_index) + 1

print("English vocab:", eng_vocab_size)
print("Spanish vocab:", spa_vocab_size)


English vocab: 8799
Spanish vocab: 15535


In [None]:
eng_train_seq = eng_tokenizer.texts_to_sequences(eng_train)
eng_val_seq = eng_tokenizer.texts_to_sequences(eng_val)
eng_test_seq = eng_tokenizer.texts_to_sequences(eng_test)

spa_train_seq = spa_tokenizer.texts_to_sequences(spa_train)
spa_val_seq = spa_tokenizer.texts_to_sequences(spa_val)
spa_test_seq = spa_tokenizer.texts_to_sequences(spa_test)


In [None]:
max_eng_len = max(len(seq) for seq in eng_train_seq)
max_spa_len = max(len(seq) for seq in spa_train_seq)

eng_train_pad = pad_sequences(eng_train_seq, maxlen=max_eng_len, padding='post')
eng_val_pad = pad_sequences(eng_val_seq, maxlen=max_eng_len, padding='post')
eng_test_pad = pad_sequences(eng_test_seq, maxlen=max_eng_len, padding='post')

spa_train_pad = pad_sequences(spa_train_seq, maxlen=max_spa_len, padding='post')
spa_val_pad = pad_sequences(spa_val_seq, maxlen=max_spa_len, padding='post')
spa_test_pad = pad_sequences(spa_test_seq, maxlen=max_spa_len, padding='post')


In [None]:
decoder_input_train = spa_train_pad[:, :-1]
decoder_output_train = spa_train_pad[:, 1:]

decoder_input_val = spa_val_pad[:, :-1]
decoder_output_val = spa_val_pad[:, 1:]

decoder_input_test = spa_test_pad[:, :-1]
decoder_output_test = spa_test_pad[:, 1:]


In [None]:
embedding_dim = 512
lstm_units = 1024


In [None]:
encoder_inputs = Input(shape=(max_eng_len,))
encoder_embedding = Embedding(eng_vocab_size, embedding_dim)(encoder_inputs)

encoder_lstm = LSTM(lstm_units, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)

encoder_states = [state_h, state_c]


In [None]:
decoder_inputs = Input(shape=(max_spa_len-1,))
decoder_embedding = Embedding(spa_vocab_size, embedding_dim)(decoder_inputs)

decoder_lstm = LSTM(lstm_units, return_sequences=True, return_state=True)

decoder_outputs, _, _ = decoder_lstm(
    decoder_embedding, initial_state=encoder_states)

decoder_dense = Dense(spa_vocab_size, activation='softmax')

decoder_outputs = decoder_dense(decoder_outputs)


In [None]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()


In [None]:
history = model.fit(
    [eng_train_pad, decoder_input_train],
    decoder_output_train,
    validation_data=([eng_val_pad, decoder_input_val], decoder_output_val),
    batch_size=64,
    epochs=20
)


Epoch 1/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 205ms/step - accuracy: 0.6890 - loss: 2.6116 - val_accuracy: 0.7507 - val_loss: 1.6485
Epoch 2/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 203ms/step - accuracy: 0.7504 - loss: 1.6439 - val_accuracy: 0.7825 - val_loss: 1.3781
Epoch 3/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 203ms/step - accuracy: 0.7855 - loss: 1.2768 - val_accuracy: 0.8044 - val_loss: 1.2077
Epoch 4/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 202ms/step - accuracy: 0.8142 - loss: 0.9866 - val_accuracy: 0.8171 - val_loss: 1.1189
Epoch 5/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 203ms/step - accuracy: 0.8399 - loss: 0.7613 - val_accuracy: 0.8268 - val_loss: 1.0671
Epoch 6/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 202ms/step - accuracy: 0.8684 - loss: 0.5823 - val_accuracy: 0.8348 - val_loss: 1.0360
Epoc

In [None]:
encoder_model = Model(encoder_inputs, encoder_states)


In [None]:
decoder_state_input_h = Input(shape=(lstm_units,))
decoder_state_input_c = Input(shape=(lstm_units,))

decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_embedding,
    initial_state=decoder_states_inputs
)

decoder_states = [state_h, state_c]

decoder_outputs = decoder_dense(decoder_outputs)

decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states
)


In [None]:
reverse_spa_index = {i: word for word, i in spa_tokenizer.word_index.items()}
reverse_spa_index[0] = ""

def translate(sentence):

    seq = eng_tokenizer.texts_to_sequences([sentence])
    seq = pad_sequences(seq, maxlen=max_eng_len, padding='post')

    states = encoder_model.predict(seq)

    target_seq = np.zeros((1,1))
    target_seq[0,0] = spa_tokenizer.word_index['<start>']

    stop = False
    decoded = ""

    while not stop:

        output, h, c = decoder_model.predict([target_seq] + states)

        predicted_id = np.argmax(output[0,-1,:])

        word = reverse_spa_index.get(predicted_id, '')

        if word == '<end>' or len(decoded.split()) > max_spa_len:
            stop = True
        else:
            decoded += " " + word

        target_seq = np.zeros((1,1))
        target_seq[0,0] = predicted_id

        states = [h,c]

    return decoded.strip()


In [None]:
print("English:", eng_test[10])
print("Predicted:", translate(eng_test[10]))
print("Actual:", spa_test[10])


English: this is your bedroom.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
Predicted: esta es tu habitación.
Actual: <start> esta es tu habitación. <end>


In [None]:
smooth = SmoothingFunction().method1

bleu_scores = []

for i in range(100):

    predicted = translate(eng_test[i])

    reference = spa_test[i].replace("<start>", "").replace("<end>", "").strip()

    score = sentence_bleu(
        [reference.split()],
        predicted.split(),
        smoothing_function=smooth
    )

    bleu_scores.append(score)

print("Average BLEU Score:", np.mean(bleu_scores))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45

###Part 2: LSTM Encoder-Decoder with Attention

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
class BahdanauAttention(tf.keras.layers.Layer):

    def __init__(self, units):
        super(BahdanauAttention, self).__init__()

        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)

    def call(self, encoder_outputs, decoder_hidden):

        decoder_hidden = tf.expand_dims(decoder_hidden, 1)

        score = self.V(
            tf.nn.tanh(
                self.W1(encoder_outputs) + self.W2(decoder_hidden)
            )
        )

        attention_weights = tf.nn.softmax(score, axis=1)

        context_vector = attention_weights * encoder_outputs
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights


In [None]:
encoder_inputs_att = Input(shape=(max_eng_len,))

encoder_embedding_att = Embedding(
    eng_vocab_size,
    embedding_dim
)(encoder_inputs_att)

encoder_lstm_att = LSTM(
    lstm_units,
    return_sequences=True,
    return_state=True,
    dropout=0.3,
    recurrent_dropout=0.3
)

encoder_outputs_att, state_h_att, state_c_att = encoder_lstm_att(
    encoder_embedding_att
)


In [None]:
from tensorflow.keras.layers import Concatenate, Reshape, TimeDistributed

# Decoder input
decoder_inputs_att = Input(shape=(max_spa_len-1,))

# Embedding
decoder_embedding_att = Embedding(
    spa_vocab_size,
    embedding_dim
)(decoder_inputs_att)

# Decoder LSTM
decoder_lstm_att = LSTM(
    lstm_units,
    return_sequences=True,
    return_state=True,
    dropout=0.3,
    recurrent_dropout=0.3
)


# Attention layer
attention = BahdanauAttention(lstm_units)

# Dense output layer
decoder_dense_att = Dense(spa_vocab_size, activation='softmax')

# Initial states from encoder
decoder_state_h = state_h_att
decoder_state_c = state_c_att

# Store outputs
outputs = []

# Loop over time steps
for t in range(max_spa_len - 1):

    # Get one timestep input
    decoder_input_t = decoder_embedding_att[:, t:t+1, :]

    # Attention context
    context_vector, attention_weights = attention(
        encoder_outputs_att,
        decoder_state_h
    )

    # FIX: Use Reshape instead of tf.expand_dims
    context_vector = Reshape((1, lstm_units))(context_vector)

    # FIX: Use Concatenate instead of tf.concat
    decoder_combined_input = Concatenate(axis=-1)(
        [context_vector, decoder_input_t]
    )

    # LSTM step
    output, decoder_state_h, decoder_state_c = decoder_lstm_att(
        decoder_combined_input,
        initial_state=[decoder_state_h, decoder_state_c]
    )

    # Dense output
    output = decoder_dense_att(output)

    outputs.append(output)

# FIX: Use Concatenate instead of tf.concat
decoder_outputs_att = Concatenate(axis=1)(outputs)


In [None]:
bahdanau_model = Model(
    [encoder_inputs_att, decoder_inputs_att],
    decoder_outputs_att
)

optimizer = tf.keras.optimizers.Adam(
    learning_rate=0.001
)

bahdanau_model.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)


bahdanau_model.summary()


In [None]:
history_bahdanau = bahdanau_model.fit(
    [eng_train_pad, decoder_input_train],
    decoder_output_train,
    validation_data=(
        [eng_val_pad, decoder_input_val],
        decoder_output_val
    ),
    batch_size=128,
    epochs=60
)


Epoch 1/60


ResourceExhaustedError: Graph execution error:

Detected at node gradient_tape/functional_9_1/dense_11_1/MatMul/MatMul_1 defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.12/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.12/dist-packages/ipykernel/kernelapp.py", line 712, in start

  File "/usr/local/lib/python3.12/dist-packages/tornado/platform/asyncio.py", line 211, in start

  File "/usr/lib/python3.12/asyncio/base_events.py", line 645, in run_forever

  File "/usr/lib/python3.12/asyncio/base_events.py", line 1999, in _run_once

  File "/usr/lib/python3.12/asyncio/events.py", line 88, in _run

  File "/usr/local/lib/python3.12/dist-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue

  File "/usr/local/lib/python3.12/dist-packages/ipykernel/kernelbase.py", line 499, in process_one

  File "/usr/local/lib/python3.12/dist-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell

  File "/usr/local/lib/python3.12/dist-packages/ipykernel/kernelbase.py", line 730, in execute_request

  File "/usr/local/lib/python3.12/dist-packages/ipykernel/ipkernel.py", line 383, in do_execute

  File "/usr/local/lib/python3.12/dist-packages/ipykernel/zmqshell.py", line 528, in run_cell

  File "/usr/local/lib/python3.12/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.12/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.12/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.12/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.12/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.12/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "/tmp/ipython-input-1791530408.py", line 1, in <cell line: 0>

  File "/usr/local/lib/python3.12/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.12/dist-packages/keras/src/backend/tensorflow/trainer.py", line 377, in fit

  File "/usr/local/lib/python3.12/dist-packages/keras/src/backend/tensorflow/trainer.py", line 220, in function

  File "/usr/local/lib/python3.12/dist-packages/keras/src/backend/tensorflow/trainer.py", line 133, in multi_step_on_iterator

  File "/usr/local/lib/python3.12/dist-packages/keras/src/backend/tensorflow/trainer.py", line 114, in one_step_on_data

  File "/usr/local/lib/python3.12/dist-packages/keras/src/backend/tensorflow/trainer.py", line 78, in train_step

OOM when allocating tensor with shape[128,1024,15535] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node gradient_tape/functional_9_1/dense_11_1/MatMul/MatMul_1}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_multi_step_on_iterator_401766]

In [None]:
encoder_model_bahdanau = Model(
    encoder_inputs_att,
    [encoder_outputs_att, state_h_att, state_c_att]
)


In [None]:
# Create new embedding layer for inference (IMPORTANT FIX)
decoder_embedding_layer = Embedding(
    spa_vocab_size,
    embedding_dim
)

# Decoder inputs
decoder_input_single = Input(shape=(1,))

# Encoder outputs input
encoder_outputs_input = Input(shape=(max_eng_len, lstm_units))

# Decoder state inputs
decoder_state_input_h = Input(shape=(lstm_units,))
decoder_state_input_c = Input(shape=(lstm_units,))

# Apply embedding layer (CORRECT WAY)
decoder_embed_single = decoder_embedding_layer(decoder_input_single)

# Attention context
context_vector, attention_weights = attention(
    encoder_outputs_input,
    decoder_state_input_h
)

# Use Reshape instead of tf.expand_dims
context_vector = Reshape((1, lstm_units))(context_vector)

# Combine context + embedding
decoder_combined_input = Concatenate(axis=-1)(
    [context_vector, decoder_embed_single]
)

# Pass through LSTM
decoder_outputs, state_h, state_c = decoder_lstm_att(
    decoder_combined_input,
    initial_state=[decoder_state_input_h, decoder_state_input_c]
)

# Dense output
decoder_outputs = decoder_dense_att(decoder_outputs)

# Create inference decoder model
decoder_model_bahdanau = Model(
    [
        decoder_input_single,
        encoder_outputs_input,
        decoder_state_input_h,
        decoder_state_input_c
    ],
    [
        decoder_outputs,
        state_h,
        state_c,
        attention_weights
    ]
)


In [None]:
def translate_bahdanau(sentence):

    seq = eng_tokenizer.texts_to_sequences([sentence])
    seq = pad_sequences(seq, maxlen=max_eng_len, padding='post')

    encoder_outputs, state_h, state_c = encoder_model_bahdanau.predict(seq)

    target_seq = np.array([[spa_tokenizer.word_index['<start>']]])

    decoded_sentence = ""

    attention_plot = []

    for i in range(max_spa_len):

        output, state_h, state_c, attention_weights = decoder_model_bahdanau.predict(
            [target_seq, encoder_outputs, state_h, state_c]
        )

        predicted_id = np.argmax(output[0,0,:])

        word = reverse_spa_index.get(predicted_id, '')

        if word == '<end>':
            break

        decoded_sentence += " " + word

        attention_plot.append(attention_weights[0,:,0])

        target_seq = np.array([[predicted_id]])

    return decoded_sentence.strip(), attention_plot


In [None]:
bleu_scores_bahdanau = []

for i in range(100):

    pred, _ = translate_bahdanau(eng_test[i])

    ref = spa_test[i].replace("<start>","").replace("<end>","")

    score = sentence_bleu(
        [ref.split()],
        pred.split(),
        smoothing_function=SmoothingFunction().method1
    )

    bleu_scores_bahdanau.append(score)

print("Bahdanau BLEU:", np.mean(bleu_scores_bahdanau))
