In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Embedding, LayerNormalization, Dropout
from tensorflow.keras import layers
import numpy as np


In [2]:
# Positional Encoding
class PositionalEncoding(layers.Layer):
    def __init__(self, maxlen, embed_dim):
        super(PositionalEncoding, self).__init__()
        self.pos_encoding = self.positional_encoding(maxlen, embed_dim)

    def get_angles(self, position, i, embed_dim):
        angles = 1 / np.power(10000, (2 * (i // 2)) / np.float32(embed_dim))
        return position * angles

    def positional_encoding(self, maxlen, embed_dim):
        angle_rads = self.get_angles(np.arange(maxlen)[:, np.newaxis], np.arange(embed_dim)[np.newaxis, :], embed_dim)
        sines = np.sin(angle_rads[:, 0::2])
        cosines = np.cos(angle_rads[:, 1::2])
        pos_encoding = np.concatenate([sines, cosines], axis=-1)
        pos_encoding = pos_encoding[np.newaxis, ...]
        return tf.cast(pos_encoding, dtype=tf.float32)

    def call(self, inputs):
        return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]

In [3]:
# Multi-Head Self-Attention
class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.projection_dim = embed_dim // num_heads
        self.query_dense = Dense(embed_dim)
        self.key_dense = Dense(embed_dim)
        self.value_dense = Dense(embed_dim)
        self.combine_heads = Dense(embed_dim)

    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs, training=None):
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)
        query = self.split_heads(query, batch_size)
        key = self.split_heads(key, batch_size)
        value = self.split_heads(value, batch_size)
        attention, weights = self.scaled_dot_product_attention(query, key, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(attention, (batch_size, -1, self.embed_dim))
        output = self.combine_heads(concat_attention)
        return output

    def scaled_dot_product_attention(self, query, key, value):
        matmul_qk = tf.matmul(query, key, transpose_b=True)
        dk = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)
        attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
        output = tf.matmul(attention_weights, value)
        return output, attention_weights

In [4]:
# Transformer Block
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = tf.keras.Sequential([Dense(ff_dim, activation="relu"), Dense(embed_dim)])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training=None):
        attn_output = self.att(inputs, training=training)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [5]:
# Encoder Layer
class Encoder(layers.Layer):
    def __init__(self, num_layers, embed_dim, num_heads, ff_dim, input_vocab_size, maxlen, rate=0.1):
        super(Encoder, self).__init__()
        self.embed_dim = embed_dim
        self.num_layers = num_layers
        self.embedding = Embedding(input_vocab_size, embed_dim)
        self.pos_encoding = PositionalEncoding(maxlen, embed_dim)
        self.enc_layers = [TransformerBlock(embed_dim, num_heads, ff_dim, rate) for _ in range(num_layers)]
        self.dropout = Dropout(rate)

    def call(self, inputs, training=None):
        seq_len = tf.shape(inputs)[1]
        inputs = self.embedding(inputs)
        inputs *= tf.math.sqrt(tf.cast(self.embed_dim, tf.float32))
        inputs = self.pos_encoding(inputs)
        inputs = self.dropout(inputs, training=training)
        for layer in self.enc_layers:
            inputs = layer(inputs, training=training)
        return inputs

In [6]:
# Decoder Layer
class Decoder(layers.Layer):
    def __init__(self, num_layers, embed_dim, num_heads, ff_dim, target_vocab_size, maxlen, rate=0.1):
        super(Decoder, self).__init__()
        self.embed_dim = embed_dim
        self.num_layers = num_layers
        self.embedding = Embedding(target_vocab_size, embed_dim)
        self.pos_encoding = PositionalEncoding(maxlen, embed_dim)
        self.dec_layers = [TransformerBlock(embed_dim, num_heads, ff_dim, rate) for _ in range(num_layers)]
        self.dropout = Dropout(rate)

    def call(self, inputs, enc_output, training=None):
        seq_len = tf.shape(inputs)[1]
        inputs = self.embedding(inputs)
        inputs *= tf.math.sqrt(tf.cast(self.embed_dim, tf.float32))
        inputs = self.pos_encoding(inputs)
        inputs = self.dropout(inputs, training=training)
        for layer in self.dec_layers:
            inputs = layer(inputs, training=training)
        return inputs

In [7]:
# Transformer Model
class Transformer(tf.keras.Model):
    def __init__(self, num_layers, embed_dim, num_heads, ff_dim, input_vocab_size, target_vocab_size, maxlen):
        super(Transformer, self).__init__()
        self.encoder = Encoder(num_layers, embed_dim, num_heads, ff_dim, input_vocab_size, maxlen)
        self.decoder = Decoder(num_layers, embed_dim, num_heads, ff_dim, target_vocab_size, maxlen)
        self.final_layer = Dense(target_vocab_size)

    def call(self, inputs, training=None):
        enc_input, dec_input = inputs
        enc_output = self.encoder(enc_input, training=training)
        dec_output = self.decoder(dec_input, enc_output, training=training)
        final_output = self.final_layer(dec_output)
        return final_output

In [8]:
# Model Hyperparameters
embed_dim = 256
num_heads = 8
ff_dim = 512
num_layers = 4
input_vocab_size = 10000  # Example vocab size
target_vocab_size = 10000  # Example vocab size
maxlen = 100  # Maximum length of input/output sequences

In [9]:
# Load and Preprocess the Dataset
def load_data(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        lines = file.readlines()
    pairs = [line.strip().split('\t') for line in lines if len(line.split('\t')) == 2]
    return zip(*pairs)

In [10]:
# Initialize the Model
transformer = Transformer(num_layers, embed_dim, num_heads, ff_dim, input_vocab_size, target_vocab_size, maxlen)

In [11]:
# Compile the Model
transformer.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

In [12]:
# Example Data Preparation
def load_data(file_path):
    input_texts, target_texts = [], []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            input_text, target_text = line.strip().split('\t')
            input_texts.append(input_text)
            target_texts.append(target_text)
    return input_texts, target_texts

In [13]:
# Load Data
input_texts, target_texts = load_data(r"C:\Users\sanja\Downloads\french-english bilingual pairs\fra.txt")

In [14]:
# Tokenization
input_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
target_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')

In [15]:
input_tokenizer.fit_on_texts(input_texts)
target_tokenizer.fit_on_texts(target_texts)

In [16]:
input_sequences = input_tokenizer.texts_to_sequences(input_texts)
target_sequences = target_tokenizer.texts_to_sequences(target_texts)

In [17]:
# Padding
input_sequences = tf.keras.preprocessing.sequence.pad_sequences(input_sequences, maxlen=maxlen, padding='post')
target_sequences = tf.keras.preprocessing.sequence.pad_sequences(target_sequences, maxlen=maxlen, padding='post')

In [18]:
# Split data into training and validation sets
split_idx = int(0.8 * len(input_sequences))
input_train, input_val = input_sequences[:split_idx], input_sequences[split_idx:]
target_train, target_val = target_sequences[:split_idx], target_sequences[split_idx:]

In [19]:
# Train the Model
transformer.fit(
    [input_train, target_train[:, :-1]],
    target_train[:, 1:],
    epochs=10,
    batch_size=64,
    validation_data=([input_val, target_val[:, :-1]], target_val[:, 1:])
)


Epoch 1/10




InvalidArgumentError: Graph execution error:

Detected at node transformer_1/decoder_1/embedding_1_1/GatherV2 defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py", line 18, in <module>

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\traitlets\config\application.py", line 1075, in launch_instance

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelapp.py", line 739, in start

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\tornado\platform\asyncio.py", line 205, in start

  File "c:\Users\sanja\anaconda3\Lib\asyncio\base_events.py", line 641, in run_forever

  File "c:\Users\sanja\anaconda3\Lib\asyncio\base_events.py", line 1987, in _run_once

  File "c:\Users\sanja\anaconda3\Lib\asyncio\events.py", line 88, in _run

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelbase.py", line 534, in process_one

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\ipykernel\ipkernel.py", line 362, in execute_request

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelbase.py", line 778, in execute_request

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\ipykernel\ipkernel.py", line 449, in do_execute

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\ipykernel\zmqshell.py", line 549, in run_cell

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3075, in run_cell

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3130, in _run_cell

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3334, in run_cell_async

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3517, in run_ast_nodes

  File "C:\Users\sanja\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code

  File "C:\Users\sanja\AppData\Local\Temp\ipykernel_26712\1024614744.py", line 2, in <module>

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 318, in fit

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 121, in one_step_on_iterator

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 108, in one_step_on_data

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 51, in train_step

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\layers\layer.py", line 882, in __call__

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\ops\operation.py", line 46, in __call__

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 156, in error_handler

  File "C:\Users\sanja\AppData\Local\Temp\ipykernel_26712\3787975811.py", line 12, in call

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\layers\layer.py", line 882, in __call__

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\ops\operation.py", line 46, in __call__

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 156, in error_handler

  File "C:\Users\sanja\AppData\Local\Temp\ipykernel_26712\1748313623.py", line 14, in call

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\layers\layer.py", line 882, in __call__

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\ops\operation.py", line 46, in __call__

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 156, in error_handler

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\layers\core\embedding.py", line 140, in call

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\ops\numpy.py", line 4875, in take

  File "c:\Users\sanja\anaconda3\Lib\site-packages\keras\src\backend\tensorflow\numpy.py", line 1951, in take

indices[17,1] = 15092 is not in [0, 10000)
	 [[{{node transformer_1/decoder_1/embedding_1_1/GatherV2}}]] [Op:__inference_one_step_on_iterator_26663]

In [None]:
# Save the Model
transformer.save('transformer_model.h5')


In [None]:
# Prediction Function
def predict_sentence(sentence, input_tokenizer, target_tokenizer, maxlen):
    # Preprocess the input sentence
    input_sequence = input_tokenizer.texts_to_sequences([sentence])
    padded_input_sequence = tf.keras.preprocessing.sequence.pad_sequences(input_sequence, maxlen=maxlen, padding='post')
    
    # Predict
    predictions = transformer.predict([padded_input_sequence, np.zeros((1, maxlen - 1))])
    predicted_sequence = np.argmax(predictions, axis=-1)
    
    # Decode the sequence
    decoded_sentence = ' '.join([target_tokenizer.index_word.get(idx, '') for idx in predicted_sequence[0]])
    return decoded_sentence

In [None]:
# Load the model (if needed)
transformer = tf.keras.models.load_model('transformer_model.h5', custom_objects={'Transformer': Transformer})


In [None]:
# Example usage of prediction
input_sentence = "Bonjour, comment ça va?"
predicted_sentence = predict_sentence(input_sentence, input_tokenizer, target_tokenizer, maxlen)
print("Predicted English Translation:", predicted_sentence)