In [71]:
import torch
from torch import cuda
from torch.autograd import Variable
import torch.nn as nn
from transformers import BertModel, BertTokenizer, BertTokenizerFast
import datasets
from datasets import load_dataset
from torch.utils.data import DataLoader

# importing other required libraries
import argparse
import logging
import math
import dill
import os
import options
import random
import sys
import numpy as np
from collections import OrderedDict

import data
import utils
from meters import AverageMeter
from PGLoss import PGLoss
from tqdm import tqdm
from dictionary import Dictionary
import re
import subprocess

# Importing Generator and Discriminator class methods
from generator_tf_bert import TransformerModel_bert
from discriminator_cnn_bert import Discriminator_cnn_bert

In [72]:
# Get user's home directory
home = os.path.expanduser("~")

# Define the path of the cache directory
cache_dir = os.path.join(home, ".cache", "huggingface", "datasets")

# Define the name and configuration of the dataset
dataset_name = "wmt14"
config_name = "fr-en"

# Build the path for the specific dataset configuration
dataset_config_path = os.path.join(cache_dir, dataset_name, config_name)

print(f"Checking cache at: {dataset_config_path}")

# Check if the dataset configuration is already cached
if os.path.exists(dataset_config_path) and len(os.listdir(dataset_config_path)) > 0:
    print("Dataset already downloaded, loading from cache.")
    # If the dataset is already downloaded, load it from the cache directory
    dataset = load_dataset(dataset_name, config_name, cache_dir=cache_dir)
else:
    print("Downloading the dataset.")
    # Download the dataset and specify the cache directory
    dataset = load_dataset(dataset_name, config_name, cache_dir=cache_dir)

# Here, you should adjust the loading of subsets to avoid redundant downloads or loading.
# Load 50k rows of the train dataset
train_dataset = dataset['train'].select(range(10))

# Keep the full valid and test datasets
valid_dataset = dataset["validation"]
test_dataset = dataset["test"].select(range(10))


# Loading Bert Model
bert_model = "bert-base-multilingual-cased"

Checking cache at: /u/prattisr/.cache/huggingface/datasets/wmt14/fr-en
Dataset already downloaded, loading from cache.


You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [73]:
def preprocess(data):
    # Initialize the BERT tokenizer
    tokenizer = BertTokenizerFast.from_pretrained("bert-base-multilingual-cased")

    en=list()
    fr=list()
    for element in data['translation']:
        # print("element: ", element)
        en.append(element['en'])
        fr.append(element['fr'] )
    
    # Tokenize the data
    inputs = tokenizer(
        en, truncation=True, padding="max_length", max_length=128
    )
    targets = tokenizer(
        fr, truncation=True, padding="max_length", max_length=128
    )

    # Convert tokens to their corresponding IDs
    input_ids = inputs.input_ids
    target_ids = targets.input_ids

    # Create attention masks
    input_attention_mask = inputs.attention_mask
    target_attention_mask = targets.attention_mask

    return {
        "input_ids": input_ids,
        "attention_mask": input_attention_mask,
        "target_ids": target_ids,
        "target_attention_mask": target_attention_mask,
    }

In [74]:
tokenized_train_datasets = train_dataset.map(
    preprocess, batched=True
)  # Using the bertFaSTtOKENIZER MAp function
tokenized_valid_datasets = valid_dataset.map(
    preprocess, batched=True
)  # Using the bertFaSTtOKENIZER MAp function
tokenized_test_datasets = test_dataset.map(
    preprocess, batched=True
)  # 

In [75]:
tokenized_test_datasets['translation'][:10]

[{'en': 'Spectacular Wingsuit Jump Over Bogota',
  'fr': 'Spectaculaire saut en "wingsuit" au-dessus de Bogota'},
 {'en': 'Sportsman Jhonathan Florez jumped from a helicopter above Bogota, the capital of Colombia, on Thursday.',
  'fr': "Le sportif Jhonathan Florez a sauté jeudi d'un hélicoptère au-dessus de Bogota, la capitale colombienne."},
 {'en': 'Wearing a wingsuit, he flew past over the famous Monserrate Sanctuary at 160km/h. The sanctuary is located at an altitude of over 3000 meters and numerous spectators had gathered there to watch his exploit.',
  'fr': "Equipé d'un wingsuit (une combinaison munie d'ailes), il est passé à 160 km/h au-dessus du célèbre sanctuaire Monserrate, situé à plus de 3 000 mètres d'altitude, où de nombreux badauds s'étaient rassemblés pour observer son exploit."},
 {'en': 'A black box in your car?',
  'fr': 'Une boîte noire dans votre voiture\xa0?'},
 {'en': "As America's road planners struggle to find the cash to mend a crumbling highway system, many

In [76]:
type(tokenized_train_datasets)

datasets.arrow_dataset.Dataset

In [77]:
text=[{'en': 'Spectacular Wingsuit Jump Over Bogota',
  'fr': 'Spectaculaire saut en "wingsuit" au-dessus de Bogota'},
 {'en': 'Sportsman Jhonathan Florez jumped from a helicopter above Bogota, the capital of Colombia, on Thursday.',
  'fr': "Le sportif Jhonathan Florez a sauté jeudi d'un hélicoptère au-dessus de Bogota, la capitale colombienne."},
 {'en': 'Wearing a wingsuit, he flew past over the famous Monserrate Sanctuary at 160km/h. The sanctuary is located at an altitude of over 3000 meters and numerous spectators had gathered there to watch his exploit.',
  'fr': "Equipé d'un wingsuit (une combinaison munie d'ailes), il est passé à 160 km/h au-dessus du célèbre sanctuaire Monserrate, situé à plus de 3 000 mètres d'altitude, où de nombreux badauds s'étaient rassemblés pour observer son exploit."},
 {'en': 'A black box in your car?',
  'fr': 'Une boîte noire dans votre voiture\xa0?'},
 {'en': "As America's road planners struggle to find the cash to mend a crumbling highway system, many are beginning to see a solution in a little black box that fits neatly by the dashboard of your car.",
  'fr': "Alors que les planificateurs du réseau routier des États-Unis ont du mal à trouver l'argent nécessaire pour réparer l'infrastructure autoroutière en décrépitude, nombreux sont ceux qui entrevoient une solution sous forme d'une petite boîte noire qui se fixe au-dessus du tableau de bord de votre voiture."},
 {'en': "The devices, which track every mile a motorist drives and transmit that information to bureaucrats, are at the center of a controversial attempt in Washington and state planning offices to overhaul the outdated system for funding America's major roads.",
  'fr': "Les appareils, qui enregistrent tous les miles parcourus par un automobiliste et transmettent les informations aux fonctionnaires, sont au centre d'une tentative controversée à Washington et dans les bureaux gouvernementaux de la planification de remanier le système obsolète de financement des principales routes américaines."},
 {'en': 'The usually dull arena of highway planning has suddenly spawned intense debate and colorful alliances.',
  'fr': 'Le secteur généralement sans intérêt de la planification des grands axes a soudain provoqué un débat fort animé et des alliances mouvementées.'},
 {'en': 'Libertarians have joined environmental groups in lobbying to allow government to use the little boxes to keep track of the miles you drive, and possibly where you drive them - then use the information to draw up a tax bill.',
  'fr': 'Les libertaires ont rejoint des groupes écologistes pour faire pression afin que le gouvernement utilise les petites boîtes pour garder la trace des miles que vous parcourez, et éventuellement de la route sur laquelle vous circulez, puis utiliser les informations pour rédiger un projet de loi fiscal.'},
 {'en': 'The tea party is aghast.', 'fr': 'Le Tea Party est atterré.'}]

In [78]:
text[0]['en']

'Spectacular Wingsuit Jump Over Bogota'

In [79]:
def preprocess_sm(data):
    # Initialize the BERT tokenizer
    tokenizer = BertTokenizerFast.from_pretrained("bert-base-multilingual-cased")

    en=list()
    fr=list()
    for sentence_pair in data:
        # print("element: ", element)
        en.append(sentence_pair['en'])
        fr.append(sentence_pair['fr'] )
    
    # Tokenize the data
    inputs = tokenizer(
        en, truncation=True, padding="max_length", max_length=128
    )
    targets = tokenizer(
        fr, truncation=True, padding="max_length", max_length=128
    )

    # Convert tokens to their corresponding IDs
    input_ids = inputs.input_ids
    target_ids = targets.input_ids

    # Create attention masks
    input_attention_mask = inputs.attention_mask
    target_attention_mask = targets.attention_mask

    return {
        "input_ids": input_ids,
        "attention_mask": input_attention_mask,
        "target_ids": target_ids,
        "target_attention_mask": target_attention_mask,
    }

In [80]:
tokenized_test_dataset_sm = preprocess_sm(text)

In [81]:
tokenized_test_dataset_sm.keys()

dict_keys(['input_ids', 'attention_mask', 'target_ids', 'target_attention_mask'])

In [82]:
tokenized_test_datasets['input_ids']

[[101,
  46361,
  78125,
  24236,
  31058,
  15971,
  37151,
  15704,
  97033,
  15192,
  102,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 [101,
  15506,
  10589,
  147,
  19792,
  44196,
  10206,
  69113,
  12715,
  54941,
  10336,
  10188,
  169,
  80634,
  16038,
  97033,
  15192,
  117,
  10105,
  12185,
  10108,
  15223,
  117,
  10135,
  67067,
  119,
  102,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,


In [83]:
type(tokenized_test_datasets)

datasets.arrow_dataset.Dataset

In [84]:
from fairseq.models.transformer import TransformerModel
getpwd = os.getcwd()
path_to_your_pretrained_model = getpwd + '/pretrained_models/wmt14.en-fr.joined-dict.transformer'
generator1_pretrained = TransformerModel.from_pretrained(
    path_to_your_pretrained_model,
    checkpoint_file='model.pt',
    bpe='subword_nmt',
    # data_name_or_path='/u/prattisr/phase-2/all_repos/Adversarial_NMT/neural-machine-translation-using-gan-master/data-bin/wmt14_en_fr_raw_sm/50kLines',
    data_name_or_path = getpwd + '/pretrained_models/wmt14.en-fr.joined-dict.transformer',
    bpe_codes = getpwd + '/pretrained_models/wmt14.en-fr.joined-dict.transformer/bpecodes'
)
print("G1 - Pre-Trained fairseq Generator loaded successfully!")

2024-03-06 23:04:03 | INFO | fairseq.file_utils | loading archive file /u/prattisr/phase-2/all_repos/Adversarial_NMT/neural-machine-translation-using-gan-master/pretrained_models/wmt14.en-fr.joined-dict.transformer
2024-03-06 23:04:03 | INFO | fairseq.file_utils | loading archive file /u/prattisr/phase-2/all_repos/Adversarial_NMT/neural-machine-translation-using-gan-master/pretrained_models/wmt14.en-fr.joined-dict.transformer
The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  self.delegate = real_initialize(
See https://hydra.cc/docs/1.2/upgrades/1.0_to_1.1/changes_to_package_header for more information


ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8

'config' is validated against ConfigStore schema with the same name.
This behavior is deprecated in Hydra 1.1 and will be removed in Hydra 1.2.
See https://hydra.cc/docs/1.2/upgrades/1.0_to_1.1/automatic_schema_matching for migration instructions.
  state = load_checkpoint_to_cpu(filename, arg_overrides)
The strict flag in the compose API is deprecated.
See https://hydra.cc/docs/1.2/upgrades/0.11_to_1.0/strict_mode_flag_deprecated for more info.



ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8!=4.9.3
ANTLR runtime and generated code versions disagree: 4.8

'config' is validated against ConfigStore schema with the same name.
This behavior is deprecated in Hydra 1.1 and will be removed in Hydra 1.2.
See https://hydra.cc/docs/1.2/upgrades/1.0_to_1.1/automatic_schema_matching for migration instructions.
  x = hub_utils.from_pretrained(
2024-03-06 23:04:05 | INFO | fairseq.tasks.translation | [en] dictionary: 44512 types
2024-03-06 23:04:05 | INFO | fairseq.tasks.translation | [fr] dictionary: 44512 types
2024-03-06 23:04:06 | INFO | fairseq.models.fairseq_model | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 10, 'log_format': 'json', 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 2, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0

G1 - Pre-Trained fairseq Generator loaded successfully!


In [85]:
def translate_with_max_length(text, max_length):
    import deep_translator
    from deep_translator import GoogleTranslator
    translator = GoogleTranslator(source="auto", target='fr')
    # Translate the text to French
    translated_text = translator.translate(text)

    # Check if the translated text is within the desired maximum length
    if len(translated_text) <= max_length:
        return translated_text
    else:
        # Truncate the translated text to the maximum length
        return translated_text[:max_length]

In [86]:
def preprocess_testData(data):
    max_length = 128
    # Initialize the BERT tokenizer
    tokenizer = BertTokenizerFast.from_pretrained("bert-base-multilingual-cased")
    import pandas as pd
    en=list()
    fr=list()
    for element in data['translation']:
        # print("element: ", element)
        en.append(element['en'])
        fr.append(element['fr'] )
    
    json_data_df = pd.DataFrame(list(zip(en,fr)), columns=['src','target'])
    import deep_translator
    from deep_translator import GoogleTranslator
    translator = GoogleTranslator(source="auto", target='fr')
    
    import numpy as np
    import random
    from random import sample
    random.seed(12345)

    # given data frame df

    # create random index
    rindex =  np.array(sample(range(len(json_data_df)), int((len(json_data_df)/2))))
    for i, row in json_data_df.iterrows():
        if i in rindex:
            # Translate the 'src' column and limit to max_len 50
            translated_text = translate_with_max_length(row['src'], max_length)
            json_data_df.loc[i, 'ht_mt_target'] = translated_text
            json_data_df.loc[i, 'ht_mt_label'] = '0'
        else:
            # Use the original 'target' column
            json_data_df.loc[i, 'ht_mt_target'] = row['target']
            json_data_df.loc[i, 'ht_mt_label'] = '1'
        
    en = json_data_df['src'].tolist()
    fr = json_data_df['target'].tolist()
    ht_mt_target = json_data_df['ht_mt_target'].tolist()
    ht_mt_label = json_data_df['ht_mt_label'].tolist()
    
    # Tokenize the data
    inputs = tokenizer(
        en, truncation=True, padding="max_length", max_length=max_length
    )
    targets = tokenizer(
        fr, truncation=True, padding="max_length", max_length=max_length
    )
    
    ht_mt_target = tokenizer(
        ht_mt_target, truncation=True, padding="max_length", max_length=max_length
    )

    
    #print statements for debugging
    print("inputs type: ", type(inputs))
    print("type of targets: ", type(targets))
    print("type of ht_mt_target: ", type(ht_mt_target))
    
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "target_ids": targets['input_ids'],
        "target_attention_mask": targets['attention_mask'],
        "ht_mt_target_ids": ht_mt_target['input_ids'],
        "ht_mt_target_attention_mask": ht_mt_target['attention_mask'],
        "ht_mt_label": json_data_df['ht_mt_label'].tolist()  # Ensure this is a list
    }

In [87]:
test_dataset['translation']

[{'en': 'Spectacular Wingsuit Jump Over Bogota',
  'fr': 'Spectaculaire saut en "wingsuit" au-dessus de Bogota'},
 {'en': 'Sportsman Jhonathan Florez jumped from a helicopter above Bogota, the capital of Colombia, on Thursday.',
  'fr': "Le sportif Jhonathan Florez a sauté jeudi d'un hélicoptère au-dessus de Bogota, la capitale colombienne."},
 {'en': 'Wearing a wingsuit, he flew past over the famous Monserrate Sanctuary at 160km/h. The sanctuary is located at an altitude of over 3000 meters and numerous spectators had gathered there to watch his exploit.',
  'fr': "Equipé d'un wingsuit (une combinaison munie d'ailes), il est passé à 160 km/h au-dessus du célèbre sanctuaire Monserrate, situé à plus de 3 000 mètres d'altitude, où de nombreux badauds s'étaient rassemblés pour observer son exploit."},
 {'en': 'A black box in your car?',
  'fr': 'Une boîte noire dans votre voiture\xa0?'},
 {'en': "As America's road planners struggle to find the cash to mend a crumbling highway system, many

In [88]:
tokenized_test_dataset_translated = test_dataset.map(preprocess_testData, batched=True)

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map: 100%|██████████| 10/10 [00:08<00:00,  1.15 examples/s]

inputs type:  <class 'transformers.tokenization_utils_base.BatchEncoding'>
type of targets:  <class 'transformers.tokenization_utils_base.BatchEncoding'>
type of ht_mt_target:  <class 'transformers.tokenization_utils_base.BatchEncoding'>





In [89]:
# tokenized_test_dataset_translated = preprocess_testData(tokenized_test_datasets, 128)

In [126]:
type(tokenized_test_dataset_translated)

datasets.arrow_dataset.Dataset

In [127]:
tokenized_test_dataset_translated.set_format(
    type="torch",
    columns=[
        "input_ids",
        "attention_mask",
        "target_ids",
        "target_attention_mask",
        "ht_mt_target_ids",
        "ht_mt_target_attention_mask",
        "ht_mt_label"
    ],
)

In [128]:
test_dataloader = DataLoader(tokenized_test_dataset_translated, batch_size=1)

In [92]:
test_dataloader

<torch.utils.data.dataloader.DataLoader at 0x7f2d302f6aa0>

Train 

In [119]:
tokenized_train_datasets

Dataset({
    features: ['translation', 'input_ids', 'attention_mask', 'target_ids', 'target_attention_mask'],
    num_rows: 10
})

In [120]:
tokenized_train_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'target_ids', 'target_attention_mask'])

In [121]:
tokenized_train_datasets

Dataset({
    features: ['translation', 'input_ids', 'attention_mask', 'target_ids', 'target_attention_mask'],
    num_rows: 10
})

In [122]:
train_dataloader = DataLoader(tokenized_train_datasets, batch_size=1)

In [130]:
for i, sample in enumerate(test_dataloader):
    # print(i, type(sample))
    print(sample.keys())
    # print(sample['translation'])
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Device is {device}")
    # for key, tensor in sample.items():
    #     # print(key, type(tensor))
    #     print(f'key is {key} and type of tensor is {type(tensor)}')
    #     sample = {key, tensor.to(device)}
    sample = {key: value.to(device) if torch.is_tensor(value) else value for key, value in sample.items()}

dict_keys(['input_ids', 'attention_mask', 'target_ids', 'target_attention_mask', 'ht_mt_target_ids', 'ht_mt_target_attention_mask', 'ht_mt_label'])
Device is cpu
dict_keys(['input_ids', 'attention_mask', 'target_ids', 'target_attention_mask', 'ht_mt_target_ids', 'ht_mt_target_attention_mask', 'ht_mt_label'])
Device is cpu
dict_keys(['input_ids', 'attention_mask', 'target_ids', 'target_attention_mask', 'ht_mt_target_ids', 'ht_mt_target_attention_mask', 'ht_mt_label'])
Device is cpu
dict_keys(['input_ids', 'attention_mask', 'target_ids', 'target_attention_mask', 'ht_mt_target_ids', 'ht_mt_target_attention_mask', 'ht_mt_label'])
Device is cpu
dict_keys(['input_ids', 'attention_mask', 'target_ids', 'target_attention_mask', 'ht_mt_target_ids', 'ht_mt_target_attention_mask', 'ht_mt_label'])
Device is cpu
dict_keys(['input_ids', 'attention_mask', 'target_ids', 'target_attention_mask', 'ht_mt_target_ids', 'ht_mt_target_attention_mask', 'ht_mt_label'])
Device is cpu
dict_keys(['input_ids', 'att

In [131]:
for i, sample in enumerate(train_dataloader):
    # print(i, type(sample))
    # print(sample.keys())
    for key, tensor in sample.items():
        print(f'key is {key} and type of tensor is {type(tensor)}')
        sample = {key, tensor.to(device)}

key is input_ids and type of tensor is <class 'torch.Tensor'>
key is attention_mask and type of tensor is <class 'torch.Tensor'>
key is target_ids and type of tensor is <class 'torch.Tensor'>
key is target_attention_mask and type of tensor is <class 'torch.Tensor'>
key is input_ids and type of tensor is <class 'torch.Tensor'>
key is attention_mask and type of tensor is <class 'torch.Tensor'>
key is target_ids and type of tensor is <class 'torch.Tensor'>
key is target_attention_mask and type of tensor is <class 'torch.Tensor'>
key is input_ids and type of tensor is <class 'torch.Tensor'>
key is attention_mask and type of tensor is <class 'torch.Tensor'>
key is target_ids and type of tensor is <class 'torch.Tensor'>
key is target_attention_mask and type of tensor is <class 'torch.Tensor'>
key is input_ids and type of tensor is <class 'torch.Tensor'>
key is attention_mask and type of tensor is <class 'torch.Tensor'>
key is target_ids and type of tensor is <class 'torch.Tensor'>
key is tar