In [None]:
from model.Attention_model import build_transformer_model
import time
import torch
import os
from sklearn.metrics import accuracy_score
from util import load_dataset
import numpy as np
import wandb
import yaml
# Authenticate with your API key
wandb.login(key="4dd27c7624f2ab82554553d3e872b47dcaa05780")


def run_epoch(model, optimizer, data_loader, loss_func, device, results, score_funcs, prefix="", desc=None):   
    
    model = model.to(device)
    running_loss = []
    y_true = []
    y_pred = []
    start = time.time()
    for inputs, labels in (data_loader):
        # -- Move the batch to the device we are using.
        inputs = moveTo(inputs, device)
        labels = moveTo(labels, device)

        if prefix == "validation" or prefix == "test":
            inputs.requires_grad_(False)  # Ensure inputs don't track gradients
            labels.requires_grad_(False)  # Ensure labels don't track gradients

        # -- Output of the model
        y_hat = model(inputs)

        # -- Compute loss.
        loss = loss_func(y_hat, labels)

        # -- Training?
        if model.training:
            loss.backward()
            optimizer.step()
            optimizer.zero_grad(set_to_none=True)

        # -- Now we are just grabbing some information we would like to have
        running_loss.append(loss.item())

        if len(score_funcs) > 0 and isinstance(labels, torch.Tensor):
            # -- moving labels & predictions back to CPU for computing / storing predictions
            labels = labels.detach().cpu().numpy()
            y_hat = y_hat.detach().cpu().numpy()
            # -- add to predictions so far
            y_true.extend(labels.tolist())
            y_pred.extend(y_hat.tolist())
    # -- end of one training epoch
    end = time.time()

    y_pred = np.asarray(y_pred)
    # We have a classification problem, convert to labels
    if len(y_pred.shape) == 2 and y_pred.shape[1] > 1:
        y_pred = np.argmax(y_pred, axis=1)
    # Else, we assume we are working on a regression problem

    results[prefix + " loss"].append(np.mean(running_loss))
    for name, score_func in score_funcs.items():
        try:
            results[prefix + " " + name].append(score_func(y_true, y_pred))
        except:
            results[prefix + " " + name].append(float("NaN"))
    return end-start  # time spent on epoch


def train_model(epoches,
                model,
                optimizer,
                train_loader,
                loss_func,
                score_funcs,
                result_path,
                patch_size,
                validation_loader=None,
                test_loader=None):

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # -- Create Result file
    if os.path.exists(result_path) is not True:
        os.mkdir(result_path)

    # -- save all results
    checkpoint_file_results = os.path.join(
        result_path, ('All_results_'+str(patch_size) + '_patchsize.pt'))
    # -- save the best result based on validation accuracy
    checkpoint_file_best_result = os.path.join(
        result_path, ('BestResult_' + str(patch_size) + '_patchsize.pt'))

    # -- send model on the device
    model = model.to(device)
    to_track = ["epoch", "total time", "train Accuracy", "train loss"]

    # -- There is Validation loader?
    if validation_loader is not None:
        to_track.append("validation Accuracy")
        to_track.append("validation loss")

    # -- There is test loader ?
    if test_loader is not None:
        to_track.append("test Accuracy")
        to_track.append("test loss")

    total_train_time = 0
    results = {}

    # -- Initialize every item with an empty list
    for item in to_track:
        results[item] = []

    Best_validation_Accuracy = 0.0

    # -- Train model
    print('Training begins...\n')

    for epoch in range(epoches):
        # -- set the model on train
        model = model.train()
        # -- Train for one epoch
        total_train_time += run_epoch(model, optimizer, train_loader,
                                      loss_func, device, results,
                                      score_funcs, prefix="train", desc="Training")

        # -- Save epoch and processing time
        results["epoch"].append(epoch)
        results["total time"].append(total_train_time)

        #   ******  Validating  ******
        if validation_loader is not None:
            model = model.eval()  # Set the model to "evaluation" mode
            with torch.no_grad():
                run_epoch(model, optimizer, validation_loader,
                          loss_func, device, results,
                          score_funcs, prefix="validation", desc="Validating")

        #   ******  Testing  ******
        if test_loader is not None:
            model = model.eval()
            with torch.no_grad():
                run_epoch(model, optimizer, test_loader,
                          loss_func, device, results,
                          score_funcs, prefix="test", desc="Testing")

        #   ******  Save results of each epoch  ******
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'results': results
        }, checkpoint_file_results)
        # show the progress and metrics
        print('\nEpoch: {}   Training accuracy: {:.2f}   Validation accuracy: {:.2f}   Test Accuracy: {:.2f}'
              .format(epoch, results['train Accuracy'][-1]*100, results['validation Accuracy'][-1]*100, results['test Accuracy'][-1]*100))
        # save the model based on the validation accuracy
        if results['validation Accuracy'][-1] > Best_validation_Accuracy:
            print('\nEpoch: {}   Training accuracy: {:.2f}   best Val accuracy: {:.2f}   Test Accuracy: {:.2f}'
                  .format(epoch, results['train Accuracy'][-1]*100, results['validation Accuracy'][-1]*100, results['test Accuracy'][-1]*100))
            Best_validation_Accuracy = results['validation Accuracy'][-1]
            best_result = {}
            best_result["epoch"] = []
            best_result["train accuracy"] = []
            best_result["validation accuracy"] = []
            best_result["test accuracy"] = []

            best_result["epoch"].append(epoch)
            best_result["train accuracy"].append(results['train Accuracy'][-1])
            best_result["validation accuracy"].append(
                results['validation Accuracy'][-1])
            best_result["test accuracy"].append(results['test Accuracy'][-1])

            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'results': best_result
            }, checkpoint_file_best_result)

In [None]:
from util import get_dataset

current_location = os.getcwd()
with open (current_location+'/config/config.yml', 'r') as file:
        config = yaml.safe_load(file)

# Initialize W&B
wandb.init(
    project="translation",  # Name of your project
    config={
        "learning_rate": config['TRAIN']['lr'],
        "batch_size": config['TRAIN']['batch_size'],
        "epochs": config['TRAIN']['epochs'],
        "model": 'english_to_french',
    })

Result_Directory = os.path.join(config['BENCHMARK']['results_path'], config['BENCHMARK']['model_name'])  
os.makedirs(Result_Directory, exist_ok=True)

score_funcs = {"Accuracy": accuracy_score}
# getting the dataloaders
train_dataloader, val_dataloader, test_dataloader, source_tokenizer, target_tokenizer =  get_dataset(config)


In [4]:
# get model
from model.Attention_model import build_transformer_model
model = build_transformer_model(config)
# define optimizer
optimizer = torch.optim.Adam(model.parameters(), lr = config['TRAIN']['lr'], eps = 1e-9)

In [None]:
from datasets import load_dataset
dataset = load_dataset('opus_books', 'en-fr', split='train')

In [None]:
dataset

In [None]:
dataset[0]

In [None]:
dataset[0]['translation']['en']

In [None]:
dataset[0]['translation']['fr']

In [1]:
import torch

In [2]:
from torch.utils.data import DataLoader # type: ignore
file_path='./dataloaders/test_dataloader.pth'
checkpoint = torch.load(file_path)
# Extract the test_dataset
test_dataset = checkpoint['test_dataset']
# Recreate the test DataLoader with batch_size = 1 and shuffle = False
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

  checkpoint = torch.load(file_path)
  from .autonotebook import tqdm as notebook_tqdm


In [3]:
len(test_dataloader)

12709

In [5]:
import numpy as np
random_samples_idx =np.random.randint(0,len(test_dataloader),3)
random_samples_idx

array([6384,  534, 7277])

In [8]:
import os
os.getcwd()

'/media/saeid/Crucial/ML_Projects/GitHub_Projects/Attention_From_Scratch'

In [6]:
import os
import yaml
import torch # type: ignore
from tokenizers import Tokenizer # type: ignore
from model.Attention_model import build_transformer_model
from test_model import get_trained_model
from util import causal_mask

  from .autonotebook import tqdm as notebook_tqdm


In [29]:
sentence = "Hello Mr"

# Read config
config_path = os.path.join(os.getcwd(), "config", "config.yml")
with open(config_path, "r") as file:
    config = yaml.safe_load(file)

# Load the trained model and tokenizers using the imported function
model, encoder_tokenizer, decoder_tokenizer = get_trained_model(config)

# get SOS, EOS, PAD tokens IDs
sos_token = torch.tensor([encoder_tokenizer.token_to_id('[SOS]')], dtype=torch.int64)
eos_token = torch.tensor([encoder_tokenizer.token_to_id('[EOS]')], dtype=torch.int64)
pad_token = torch.tensor([encoder_tokenizer.token_to_id('[PAD]')], dtype=torch.int64)

# Convert input sentence into token IDs
encoder_input_tokens = encoder_tokenizer.encode(sentence).ids

# We need to add SOS and EOS tokens into encoder tokens and pad the sentence to the max_seq_len
encoder_padding_tokens = config['MODEL']['source_sq_len'] - \
    len(encoder_input_tokens) - 2

encoder_input = torch.cat([sos_token,
                            torch.tensor(
                                encoder_input_tokens, dtype=torch.int64),
                            eos_token,
                            torch.tensor([pad_token] * encoder_padding_tokens, dtype=torch.int64)])
encoder_input= encoder_input.unsqueeze(0)

[INFO] Model weights loaded successfully!


In [30]:
encoder_input.shape

torch.Size([1, 500])

In [31]:
encoder_mask = (encoder_input != pad_token).unsqueeze(
            0).unsqueeze(0).int()  # (1, 1, max_seq_len)
encoder_mask.shape

torch.Size([1, 1, 1, 500])

In [32]:
def greedy_decode(model, encoder_input, encoder_mask, tokenizer_src, tokenizer_tgt, max_len, device):
    sos_idx = tokenizer_src.token_to_id("[SOS]")
    eos_idx = tokenizer_src.token_to_id("[EOS]")

    # Precompute the encoder output and reuse it for every step
    encoder_output = model.encode(encoder_input, encoder_mask)

    # Initialize the decoder input with the sos token
    decoder_input = torch.empty(1, 1).fill_(sos_idx).type_as(encoder_input).to(device)
    while True:
        if decoder_input.size(1) == max_len:
            break
        # build mask for target
        # Causal mask for the decoder (lower triangular matrix)
        # encoder_input.size(1) # max_seq_len
        decoder_mask = causal_mask(decoder_input.size(1)).type_as(encoder_mask).to(device) # (1, seq_len, seq_len)
        # calculate output
        out = model.decode(encoder_output, encoder_mask, decoder_input, decoder_mask) #    
        prob = model.projection(out[:, -1]) # (batch, voc_size) --> (1, 10000)
        _, next_word = torch.max(prob, dim=1)
        decoder_input = torch.cat(
            [
                decoder_input,
                torch.empty(1, 1).type_as(encoder_input).fill_(next_word.item()).to(device),
            ],
            dim=1,
        )
        if next_word == eos_idx:
            break

    return decoder_input

In [33]:
translated_tokens = greedy_decode(model, encoder_input, encoder_mask, encoder_tokenizer, decoder_tokenizer, max_len=500, device="cpu")

In [34]:
translated_tokens

tensor([[  1, 294,   5,   2]])