<a href="https://colab.research.google.com/github/kimutaielvis/C-PROGRAMS/blob/main/Transformer_Model_for_English_French_Translation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **1. Setup and Installation**

In [4]:
!git clone https://github.com/kimutaielvis/NEW.git
%cd NEW


fatal: destination path 'NEW' already exists and is not an empty directory.
/content/NEW


In [2]:
#@title Install and Import Libraries
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.18.0


Explanation:

*  Imports core libraries for numerical operations (NumPy) and deep learning (TensorFlow/Keras)
*  Verifies TensorFlow version to ensure compatibility




# **2. Data Loading and Preprocessing python**

In [None]:
#@title Download and Extract Dataset
data_path = keras.utils.get_file(
    "fra-eng.zip",
    origin="http://storage.googleapis.com/download.tensorflow.org/data/fra-eng.zip",
    extract=True)
data_path = data_path.replace(".zip", "")
print("Dataset extracted to:", data_path)

Explanation:
*   Downloads the French-English parallel corpus from TensorFlow datasets
*   Automatically extracts the zip file
*   Stores path to the extracted data directory











# **3. Text Processing**

In [None]:
#@title Process Text Pairs
with open(data_path + "/fra.txt", "r", encoding="utf-8") as f:
    lines = f.read().split("\n")[:-1]  # Remove last empty line

text_pairs = []
for line in lines[:5000]:  # Using first 5k pairs for demo
    english, french = line.split("\t")
    french = "[start] " + french + " [end]"  # Add special tokens
    text_pairs.append((english, french))

print("Sample pair:", text_pairs[0])

Explanation:
*   Limits to 5,000 pairs for demonstration (remove limit for full dataset
*   Adds special tokens [start] and [end] to French sentences
*   Splits each line into English-French pairs
*   Reads the text file line by line







# **4. Tokenization**

In [None]:
#@title Create Tokenizers
english_tokenizer = keras.preprocessing.text.Tokenizer(filters="")
french_tokenizer = keras.preprocessing.text.Tokenizer(filters="")

english_tokenizer.fit_on_texts([pair[0] for pair in text_pairs])
french_tokenizer.fit_on_texts([pair[1] for pair in text_pairs])

english_vocab_size = len(english_tokenizer.word_index) + 1
french_vocab_size = len(french_tokenizer.word_index) + 1

Sequence_length = 20
batch_size = 64

print("English vocab size:", english_vocab_size)
print("French vocab size:", french_vocab_size)

Explanation:
*  Initializes tokenizers without default text filtering
*  Builds vocabulary from the text pairs
*  Calculates vocabulary sizes (+1 for padding token)
*  Displays vocabulary sizes for both languages

# **5. Transformer Architecture**

In [None]:
#@title Transformer model architecture
# Define the transformer model architecture
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Attention and Normalization
    x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(inputs, inputs)
    x = layers.Dropout(dropout)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    res = x + inputs  # Residual connection

    # Feed Forward Part
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(res)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    return x + res  # Final residual connection



In [None]:
#@title Transformer Model with Attention
# Transformer Model with Attention
def transformer_decoder(inputs, enc_outputs, head_size, num_heads, ff_dim, dropout=0):
    # Self-Attention and Normalization
    x = layers.MultiHeadAttention(
        key_dim=head_size,
        num_heads=num_heads,
        dropout=dropout)(inputs, inputs)  # Self-attention
    x = layers.Dropout(dropout)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    res = x + inputs  # First residual connection

    # Encoder-Decoder Attention
    x = layers.MultiHeadAttention(
        key_dim=head_size,
        num_heads=num_heads,
        dropout=dropout)(res, enc_outputs)  # Cross-attention
    x = layers.Dropout(dropout)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    res = x + res  # Second residual connection

    # Feed Forward Network
    x = layers.Conv1D(
        filters=ff_dim,
        kernel_size=1,
        activation="relu")(res)  # Position-wise FFN
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(
        filters=inputs.shape[-1],
        kernel_size=1)(x)  # Projection back
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    return x + res  # Final output

In [None]:
dec_outputs = layers.Dropout(0.1)(dec_outputs)
    outputs = layers.Dense(target_vocab_size, activation="softmax")(dec_outputs)

    model = keras.Model(inputs=[inputs, dec_inputs], outputs=outputs)
    return model

[ ] # Build and train the model
    transformer_model = build_model(english_vocab_size, french_vocab_size, sequence_length)
    transformer_model.compile(optimizer='adam', loss='sparse_categorica_lcrossentropy', metrics="['accuracy'])

    english_sequences = english_tokenizer.texts_to_sequences([pair[0] for pair in text_pairs])
    french_sequences = french_tokenizer.texts_to_sequences([pair[1] for pair in text_pairs])

    english_sequences = keras.preprocessing_sequence.pad_sequences(english_sequences, maxlen-sequence_length, padding="post")
    french_sequences = keras.preprocessing_sequence.pad_sequences(french_sequences, maxlen-sequence_length, padding="post")

    transformer_model.fit([english_sequences, french_sequences[:, i-1]], french_sequences.reshape(french_sequences.shape[0], french_sequences.shape[1], 1)[i,
    batch_size=batch_size, epochs=10)