In [20]:
import torch
import sys
import os

# Set up imports from src/
sys.path.append(os.path.abspath("../"))

from src.attention import MultiHeadAttention
from src.encoder import EncoderLayer
from src.decoder import DecoderLayer
from src.positional_encodings import PositionalEncoding



In [None]:
#testing with dummy data
B, T, D = 2, 10, 512 # Batch size, sequence length, embedding dimension
dummy_input = torch.randn(B,T,D)


In [4]:
pos_enc = PositionalEncoding(d_model=D)
output = pos_enc(dummy_input)

print("Positional encoding added. Output shape:", output.shape)



Positional encoding added. Output shape: torch.Size([2, 10, 512])


We use sinusoidal encodings to inject order into our input sequence, since the Transformer has no built-in recurrence.


In [5]:
attn = MultiHeadAttention(d_model=D, num_heads=8)
out = attn(dummy_input, dummy_input, dummy_input)

print("Multi-head attention output shape:", out.shape)


Multi-head attention output shape: torch.Size([2, 10, 512])


Here, we project the input into Query, Key and Value vectors compute scaled dot-product attention, and concatenate the heads.

In [6]:
encoder = EncoderLayer(d_model=D, num_heads=8, d_ff=2048)
enc_output = encoder(dummy_input)

print("Encoder layer output shape:", enc_output.shape)


Encoder layer output shape: torch.Size([2, 10, 512])


The encoder applies self-attention + feedforward network, wrapped in residual connections and layer normalization.


In [7]:
decoder = DecoderLayer(d_model=D, num_heads=8, d_ff=2048)
dec_output = decoder(dummy_input, enc_output)

print("Decoder layer output shape:", dec_output.shape)


Decoder layer output shape: torch.Size([2, 10, 512])


The decoder combines:
 Masked self-attention (can't look ahead)
 Encoder-decoder attention
-Feedforward network

It enables sequence generation by learning dependencies on previously generated tokens and encoder output.


Testing the transformer with some data


In [18]:
import pandas as pd
df = pd.read_csv("../data/tiny_translation.csv")
print(df.columns.tolist())

['en', 'fr']


In [27]:
import torch
import sys
import os
import pandas as pd

sys.path.append(os.path.abspath("../"))

from src.preprocessing import load_translation_data, build_vocab, tensorize

csv_path = "../data/tiny_translation.csv"

# Debug: Check CSV file
if os.path.exists(csv_path):
    print("CSV file found!")
    df = pd.read_csv(csv_path)
    print("CSV columns:", df.columns.tolist())
    print("First few rows:\n", df.head())
else:
    print("CSV file not found at:", csv_path)

df = pd.read_csv(csv_path)
src_sents = df['en'].tolist()
tgt_sents = df['fr'].tolist()
src_vocab = build_vocab(src_sents)
tgt_vocab = build_vocab(tgt_sents)
 




CSV file found!
CSV columns: ['en', 'fr']
First few rows:
                         en                                  fr
0                    hello                             bonjour
1              how are you                       comment ça va
2  the cat sits on the mat      le chat est assis sur le tapis
3  i love machine learning  j'aime l'apprentissage automatique
4             good morning                             bonjour
