### Import Modules

In [1]:
# Python Libraries
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from nltk.tokenize import RegexpTokenizer
from torch.utils.data import DataLoader, TensorDataset
from tqdm.notebook import tqdm

In [2]:
# Our Modules
from modules.data_preprocessing import DataPreprocessing
from modules.transformer import Transformer

### Parameters

In [3]:
data_path = fr"D:\courses\Transformers\Data\eng_french.csv"
max_sentence_length = 30
batch_size = 32
embedding_dim = 512
num_encoders = 1
num_decoders = 1
num_multiheads = 8
encoder_hidden_layer_size = 2048
decoder_hidden_layer_size = 2048

### Preprocess Data

In [4]:
preprocess = DataPreprocessing(
    data_path = data_path,
    max_sentence_length = max_sentence_length,
    batch_size = batch_size
)
dataset, dataloader = preprocess.forward()

Loading Data from D:\courses\Transformers\Data\eng_french.csv
Creating English vocabulary...
Creating French vocabulary...
Converting English Sentences to integer...
Converting French Sentences to integer...
Preparing Dataset for Input...
Done.


### Get Output

In [5]:
transformer = Transformer(
    batch_size,
    max_sentence_length,
    embedding_dim,
    num_multiheads,
    num_encoders,
    num_decoders,
    encoder_hidden_layer_size,
    decoder_hidden_layer_size,
    english_vocab_size = preprocess.english_vocab_size + 1,
    french_vocab_size = preprocess.french_vocab_size + 1
)

In [13]:
total_params = sum(p.numel() for p in transformer.parameters() if p.requires_grad)
print(f"Total number of parameters in the model are: {total_params}")

Total number of parameters in the model are: 36745850


In [8]:
for batch in tqdm(dataloader):
    english_batch, french_batch = batch

    result = transformer.forward(english_batch, french_batch)

    sentence_outputs = [' '.join([preprocess.french_int_word[int(num)] for num in row]) for row in result]
    print(sentence_outputs)
    break

  0%|          | 0/5489 [00:00<?, ?it/s]

['renforcer besoin féministe commenceront battons employeur encombrée invité taillâmes quart terrier report déjeunais existera existera programmée existera programmée existera programmée programmée programmée programmée programmée programmée programmée programmée programmée programmée programmée', 'également besoin féministe jurer mentionna versé rechargée connexion multiplication types noirs épargnes potable tempuras engourdira appelait hermétiques heurté heurté deniers deniers deniers partirent partirent partirent partirent deniers deniers deniers partirent', '1874 évidents serpent déranges échauffent tricheur proximité romancier romancier véridique véridique pari collectionner prodigue prodigue prodigue laisserons duc tournera laisserons serveuse serveuse serveuse serveuse serveuse laisserons laisserons laisserons laisserons laisserons', 'rêvais persistant survint rougissez proximité sculpté sculpté sculpté romancier véridique véridique véridique abandonné additionner lèverait dégou