In [None]:
import torch

print("CUDA Available:", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")
print("Current Device:", torch.cuda.current_device() if torch.cuda.is_available() else "CPU")

torch.cuda.empty_cache()
torch.device("cuda")

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset

# Define the source and target languages
SRC_LANG = "en_XX"  # Source language (English)
TGT_LANG = "chv_XX"  # Target language (Chavacano)

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load tokenizer and model
model_name = "facebook/mbart-large-50-many-to-many-mmt"
tokenizer = AutoTokenizer.from_pretrained(model_name, src_lang=SRC_LANG, tgt_lang=TGT_LANG)

model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
    
# Load training and validation datasets
dataset = load_dataset("csv", data_files={"train": "train1.csv", "validation": "validation1.csv"}, encoding='latin-1')

# Tokenization function
def preprocess_function(examples):
    inputs = [ex + " </s>" for ex in examples["source"]]
    targets = [ex + " </s>" for ex in examples["target"]]

    model_inputs = tokenizer(inputs, text_target=targets, max_length=128, truncation=True, padding="max_length").to(device)

    return model_inputs

# Preprocess dataset
tokenized_datasets = dataset.map(preprocess_function, batched=True)
print (tokenized_datasets)

In [None]:
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback

#Training arguments
training_args = TrainingArguments(
    output_dir="./mbart_finetuned_2e-4_16",
    logging_dir="./logs",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    gradient_accumulation_steps=4,
    num_train_epochs=10,  
    learning_rate=2e-4,  
    warmup_steps=500,
    weight_decay=0.01,
    adam_epsilon=1e-8,
    max_grad_norm=1.0,
    save_strategy="epoch",
    save_total_limit=2,
    logging_strategy="epoch",
    logging_steps=100,
    fp16=True if torch.cuda.is_available() else False,
    eval_strategy="epoch",
    metric_for_best_model="eval_loss",
    load_best_model_at_end=True,
)
 
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"], 
    eval_dataset=tokenized_datasets["validation"],
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)],
    processing_class=tokenizer,
)
trainer.train()
