In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import re
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.layers import Input, Dropout, Dense, BatchNormalization, Activation, concatenate, GRU, Embedding, Flatten, BatchNormalization

In [2]:
df = pd.read_csv("Translation.csv")
df

Unnamed: 0.1,Unnamed: 0,English,German
0,0,A cat came out from under the desk.,Eine Katze kam unter dem Schreibtisch raus.
1,1,"Instead of going myself, I sent a gift.","Anstatt selbst zu gehen, habe ich ein Geschenk..."
2,2,let them eat crack reads this graffiti in new ...,lassen sie essen Crack liest diese Graffiti in...
3,3,My father helped me with my homework.,Mein Vater half mir bei meinen Hausaufgaben.
4,4,Please correct me when I make a mistake.,"Bitte korrigieren Sie mich, wenn ich einen Feh..."
...,...,...,...
12995,12995,Our house was robbed while we were away.,"Unser Haus wurde ausgeraubt, als wir weg waren."
12996,12996,I would like to speak to the head nurse.,Ich möchte mit der Oberschwester sprechen.
12997,12997,We'll do it tomorrow.,Wir machen es morgen.
12998,12998,What's going on here?,Was ist hier los?


In [3]:
input_texts= df['English']
target_texts=df['German']
df['German'] = "<start> " + df['German'] + " <end>"

In [4]:
# Tokenizing input (English) sentences
input_tokenizer = Tokenizer()
input_tokenizer.fit_on_texts(input_texts)
input_sequences = input_tokenizer.texts_to_sequences(input_texts)
max_input_length = pd.Series(input_sequences).map(len).max()+1
input_sequences_padded = pad_sequences(input_sequences, maxlen=max_input_length, padding='post')

# Tokenizing output (German) sentences
target_tokenizer = Tokenizer()
target_tokenizer.fit_on_texts(target_texts)
target_sequences = target_tokenizer.texts_to_sequences(target_texts)
max_target_length = pd.Series(target_sequences).map(len).max()+1
target_sequences_padded = pad_sequences(target_sequences, maxlen=max_target_length, padding='post')

# Vocabulary sizes
input_vocab_size = len(input_tokenizer.word_index) + 1
target_vocab_size = len(target_tokenizer.word_index) + 1


In [5]:
import tensorflow as tf

class AttentionLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super(AttentionLayer, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model

        # Multi-Head Attention layer from TensorFlow
        self.multi_head_attention = tf.keras.layers.MultiHeadAttention(num_heads=self.num_heads, key_dim=self.d_model)
        self.dense = tf.keras.layers.Dense(d_model)  # Dense layer to output the context vector

    def call(self, encoder_outputs, decoder_outputs, mask=None):
        # Multi-Head Attention expects inputs as (batch_size, seq_len, d_model)
        # Encoder outputs: (batch_size, seq_len_encoder, d_model)
        # Decoder outputs: (batch_size, seq_len_decoder, d_model)

        # Compute attention (no need to manually compute scores here)
        attention_output, attention_weights = self.multi_head_attention(
            query=decoder_outputs,  # Decoder outputs as query
            value=encoder_outputs,  # Encoder outputs as value
            key=encoder_outputs,    # Encoder outputs as key
            attention_mask=mask,    # Mask for padding, optional
            return_attention_scores=True  # Return attention weights if needed
        )

        # The output from multi-head attention is already the weighted sum of encoder outputs
        # We can optionally pass it through a dense layer for further processing
        context_vector = self.dense(attention_output)  # (batch_size, seq_len_decoder, d_model)

        return context_vector, attention_weights


In [6]:
from tensorflow.keras.layers import Bidirectional, LSTM, Embedding, Dense, Input, Concatenate

d_model = 25 # embed_size
num_heads = 8  # Number of attention heads

# encoder
encoder_inputs = Input(shape=(max_input_length,))
encoder_embedded = Embedding(input_vocab_size, d_model, mask_zero=True)(encoder_inputs)
encoder_lstm = LSTM(512, return_state=True, return_sequences=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedded )
encoder_states = [state_h, state_c]

# decder
decoder_inputs = Input(shape=(None,))
decoder_embedded = Embedding(target_vocab_size, d_model, mask_zero=True)(decoder_inputs)
decoder_lstm = LSTM(512, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedded, initial_state=encoder_states)

# attention_layer
attention_layer = AttentionLayer(d_model=d_model, num_heads=num_heads)
attention_outputs, attention_weights = attention_layer(encoder_outputs, decoder_outputs)

#combined layers
decoder_combined_context = Concatenate()([decoder_outputs, attention_outputs])

output_layer = Dense(target_vocab_size, activation='softmax')
outputs = output_layer(decoder_combined_context)

model = tf.keras.models.Model([encoder_inputs, decoder_inputs], outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()



In [7]:
# decoder input and output data
decoder_input_data = target_sequences_padded[:, :-1]
decoder_output_data = target_sequences_padded[:, 1:]

In [8]:
print("Decoder input shape:", decoder_input_data.shape)
print("Decoder output shape:", decoder_output_data.shape)

Decoder input shape: (13000, 148)
Decoder output shape: (13000, 148)


In [9]:
# Training
history = model.fit(
    [input_sequences_padded, decoder_input_data],  # Encoder input and decoder input
    decoder_output_data,                   # Decoder output
    epochs=200,
    validation_split=0.2
)

Epoch 1/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 205ms/step - accuracy: 0.9295 - loss: 1.3616 - val_accuracy: 0.9503 - val_loss: 0.4036
Epoch 2/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 210ms/step - accuracy: 0.9491 - loss: 0.3939 - val_accuracy: 0.9504 - val_loss: 0.3835
Epoch 3/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 210ms/step - accuracy: 0.9497 - loss: 0.3712 - val_accuracy: 0.9505 - val_loss: 0.3721
Epoch 4/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 210ms/step - accuracy: 0.9508 - loss: 0.3537 - val_accuracy: 0.9512 - val_loss: 0.3666
Epoch 5/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 210ms/step - accuracy: 0.9510 - loss: 0.3408 - val_accuracy: 0.9519 - val_loss: 0.3610
Epoch 6/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 209ms/step - accuracy: 0.9512 - loss: 0.3296 - val_accuracy: 0.9519 - val_loss: 0.3567
Epoc

KeyboardInterrupt: 

In [10]:
encoder_model = tf.keras.models.Model(encoder_inputs, [encoder_outputs, state_h, state_c])
decoder_state_input_h = tf.keras.layers.Input(shape=(512,))
decoder_state_input_c = tf.keras.layers.Input(shape=(512,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
#decoder_embedded = Embedding(target_vocab_size, d_model, mask_zero=True)(decoder_inputs)
decoder_outputs, state_h, state_c = decoder_lstm(decoder_embedded, initial_state=decoder_states_inputs)

# attention layerr
attention_outputs, attention_weights = attention_layer(encoder_outputs, decoder_outputs)

# Combined
decoder_combined_context = tf.keras.layers.concatenate([decoder_outputs, attention_outputs])

decoder_outputs = output_layer(decoder_combined_context)
decoder_model = tf.keras.models.Model(
    [decoder_inputs] + decoder_states_inputs + [encoder_outputs],
    [decoder_outputs, state_h, state_c]
)

In [12]:
def summarize_text(input_text):
    input_seq = input_tokenizer.texts_to_sequences([input_text])
    input_seq = pad_sequences(input_seq, maxlen=max_input_length, padding='post')

    model_outputs = encoder_model.predict(input_seq)
    encoder_outputs, state_h, state_c = model_outputs

    # Initialize the decoder input with the start token
    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = target_tokenizer.word_index.get('<start>', 1)

    # summary
    summary_output = ''
    for _ in range(max_target_length):
        output_tokens, h, c = decoder_model.predict([target_seq] + [state_h, state_c] + [encoder_outputs])
        predicted_token = np.argmax(output_tokens[0, -1, :])
        word = target_tokenizer.index_word.get(predicted_token, '')
        if word == '<end>':
            break
        summary_output += ' ' + word
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = predicted_token
        state_h, state_c = h, c

    return summary_output.strip()

print(summarize_text("I would like to talk with my family"))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57