#Model Trainig

In [None]:
import pandas as pd
import torch
from datasets import Dataset, DatasetDict
from transformers import MBart50TokenizerFast, AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer, DataCollatorForSeq2Seq
import uuid


device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# Load finance dataset
df = pd.read_csv('./Finance_Data/Finance_Data_Cleaned.csv') 
df_finance = df.copy()
df_finance.drop(['Unnamed: 0.1', 'Unnamed: 0'], axis=1, inplace=True, errors='ignore')


print("Finance DataFrame shape:", df_finance.shape)
print("Sample finance data:\n", df_finance.head(5))


df_finance = df_finance.rename(columns={"Tamil": "ta", "Telugu": "te"})


dataset = Dataset.from_pandas(df_finance)
split_dataset = dataset.train_test_split(test_size=0.1, seed=42)
finance_dataset = DatasetDict({"train": split_dataset["train"], "test": split_dataset["test"]})
print("Finance train dataset size:", len(finance_dataset["train"]))
print("Finance test dataset size:", len(finance_dataset["test"]))

# Load pre-trained mBART model and tokenizer from previous training
MBART_MODEL_PATH = "./mbart_finetuned_te_to_ta"
mbart_tokenizer = MBart50TokenizerFast.from_pretrained(MBART_MODEL_PATH, src_lang="te_IN", tgt_lang="ta_IN")  # Changed src to te_IN, tgt to ta_IN
mbart_model = AutoModelForSeq2SeqLM.from_pretrained(MBART_MODEL_PATH).to(device)

# Verify vocab sizes
mbart_vocab_size_tokenizer = len(mbart_tokenizer)
mbart_vocab_size_model = mbart_model.get_output_embeddings().weight.size(0)
print("mBART - Tokenizer vocab size:", mbart_vocab_size_tokenizer)
print("mBART - Model output vocab size:", mbart_vocab_size_model)


if mbart_vocab_size_tokenizer != mbart_vocab_size_model:
    print(f"Warning: Vocab size mismatch (Tokenizer: {mbart_vocab_size_tokenizer}, Model: {mbart_vocab_size_model}). Adjusting model embeddings.")
    mbart_model.resize_token_embeddings(mbart_vocab_size_tokenizer)
    print("Post-resize model vocab size:", mbart_model.get_output_embeddings().weight.size(0))
else:
    print("mBART - Vocab sizes match, no adjustment needed.")

# Preprocessing function
def mbart_preprocess_function(examples):
    inputs = [te_text for te_text in examples["te"]] 
    targets = [ta_text for ta_text in examples["ta"]] 
    model_inputs = mbart_tokenizer(inputs, max_length=128, truncation=True, padding="max_length")
    with mbart_tokenizer.as_target_tokenizer():
        labels = mbart_tokenizer(targets, max_length=128, truncation=True, padding="max_length").input_ids
    labels = [[-100 if token == mbart_tokenizer.pad_token_id else token for token in seq] for seq in labels]
    model_inputs["labels"] = labels
    return model_inputs

# Apply preprocessing
finance_tokenized_datasets = finance_dataset.map(
    mbart_preprocess_function,
    batched=True,
    batch_size=1000,
    remove_columns=["ta", "te"]
)
print("Finance - Tokenized train sample:", finance_tokenized_datasets["train"][0])

# Training arguments for fine-tuning
finance_training_args = Seq2SeqTrainingArguments(
    output_dir="./mbart_finetuned_finance_te2ta",  
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=5,
    report_to="none",
    push_to_hub=False,
    fp16=True,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
    logging_steps=200,
    save_steps=2000
)

# Data collator and trainer
finance_data_collator = DataCollatorForSeq2Seq(mbart_tokenizer, model=mbart_model)
finance_trainer = Seq2SeqTrainer(
    model=mbart_model,
    args=finance_training_args,
    train_dataset=finance_tokenized_datasets["train"],
    eval_dataset=finance_tokenized_datasets["test"],
    tokenizer=mbart_tokenizer,
    data_collator=finance_data_collator
)


print("Fine-tuning mBART on finance data (Telugu to Tamil)...")
finance_trainer.train()


finance_trainer.save_model("./mbart_finetuned_finance_te2ta")
mbart_tokenizer.save_pretrained("./mbart_finetuned_finance_te2ta")


mbart_finance_model = AutoModelForSeq2SeqLM.from_pretrained("./mbart_finetuned_finance_te2ta").to(device)
mbart_finance_tokenizer = MBart50TokenizerFast.from_pretrained("./mbart_finetuned_finance_te2ta", src_lang="te_IN", tgt_lang="ta_IN") 
print("mBART Finance - Saved tokenizer vocab size:", len(mbart_finance_tokenizer))
print("mBART Finance - Saved model output vocab size:", mbart_finance_model.get_output_embeddings().weight.size(0))

# Test translation with debugging
def mbart_translate_finance_text(input_text, debug=False):
    inputs = mbart_finance_tokenizer(input_text, return_tensors="pt", max_length=128, truncation=True, padding=True).to(device)
    if debug:
        print("Tokenized Input IDs:", inputs["input_ids"].tolist())
    outputs = mbart_finance_model.generate(
        **inputs,
        max_length=256,
        min_length=10,
        num_beams=5,
        early_stopping=False,
        length_penalty=1.0,
        no_repeat_ngram_size=2,
        forced_bos_token_id=mbart_finance_tokenizer.lang_code_to_id["ta_IN"]  
    )
    if debug:
        print("Raw Output IDs:", outputs[0].tolist())
        print("Decoded with special tokens:", mbart_finance_tokenizer.decode(outputs[0], skip_special_tokens=False))
    decoded_output = mbart_finance_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return decoded_output.strip()

# Test with sample
input_text = "బ్యాంకు ఖాతాలో పెట్టుబడి పెట్టాలనుకుంటున్నాను."  # "I want to invest in a bank account."
translated_text = mbart_translate_finance_text(input_text, debug=True)
print("mBART Finance Translation (Telugu to Tamil):", translated_text)

2025-05-06 17:01:18.050467: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-06 17:01:18.062478: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746531078.076098  322489 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746531078.080205  322489 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1746531078.090613  322489 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

Using device: cuda
Finance DataFrame shape: (32334, 2)
Sample finance data:
                                                Tamil  \
0  கடந்த நிதியாண்டு முழுவதும் இந்நிறுவனத்தின் வரு...   
1  முதலிடத்தில் இருப்பது கேட்ஸ் 2006 ல் மைக்ரோசா...   
2  கடந்த ஆண்டு வழங்கப்பட்ட தொகையுடன் ஒப்பிட்டால் ...   
3  கருப்புப் பணத்திற்கு எதிராக டிஜிட்டல் பரிவர்த்...   
4  இந்த நவீனமயமாக்கல் மூலமாக நிறுவன வள திட்டமிடல்...   

                                              Telugu  
0  గత ఆర్థిక సంవత్సరంలో కంపెనీ ఆదాయం రూ .335.53 క...  
1  2006 లో గేట్స్ మైక్రోసాఫ్ట్ నుండి బయలుదేరినప్ప...  
2  గత సంవత్సరం చెల్లించిన మొత్తంతో పోలిస్తే ఇది 3...  
3  బ్లాక్ మనీకి వ్యతిరేకంగా డిజిటల్ లావాదేవీలను ప...  
4  ఈ ఆధునీకరణతో, ERP పరిష్కారాలను అటువంటి కార్యకల...  
Finance train dataset size: 29100
Finance test dataset size: 3234
mBART - Tokenizer vocab size: 250054
mBART - Model output vocab size: 250054
mBART - Vocab sizes match, no adjustment needed.


Map:   0%|          | 0/29100 [00:00<?, ? examples/s]



Map:   0%|          | 0/3234 [00:00<?, ? examples/s]

Finance - Tokenized train sample: {'input_ids': [250045, 135813, 16957, 1886, 41095, 3049, 90382, 2695, 7747, 16577, 2195, 18472, 28184, 4276, 124584, 142635, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'labels': [250044, 15413, 12391, 39639, 4575, 123555, 2913, 22262, 57836, 10479, 39438, 6001, 108471

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss
1,1.103,1.032806
2,0.9348,0.991043
3,0.825,0.985544
4,0.7716,0.989645
5,0.7112,0.998757


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].


mBART Finance - Saved tokenizer vocab size: 250054
mBART Finance - Saved model output vocab size: 250054
Tokenized Input IDs: [[250045, 167493, 198661, 1296, 24663, 2127, 148763, 24663, 239555, 18668, 5, 2]]
Raw Output IDs: [2, 250044, 115828, 1962, 57494, 194272, 80087, 105723, 18890, 95591, 142240, 5, 2]
Decoded with special tokens: </s>ta_IN வங்கிக் கணக்கில் முதலீடு செய்ய விரும்புகிறேன்.</s>
mBART Finance Translation (Telugu to Tamil): வங்கிக் கணக்கில் முதலீடு செய்ய விரும்புகிறேன்.


#Evaluation Metrics

In [None]:
import numpy as np
import pandas as pd
import torch
import logging
from tqdm import tqdm
from datasets import Dataset
from transformers import MBart50TokenizerFast, MBartForConditionalGeneration, AutoConfig, AutoModelForSeq2SeqLM
from sacrebleu import corpus_bleu, corpus_chrf, corpus_ter
from indicnlp.tokenize import indic_tokenize
from bert_score import score as bert_score


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


device = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"Using device: {device}")


MODEL_PATH = "./mbart_finetuned_finance_te2ta"  
DATASET_PATH = "./Finance_Data/testing.csv"  
NUM_SAMPLES = 800  
SIMILARITY_THRESHOLD = 0.85  

# Output paths
BLEU_OUTPUT_PATH = "./Finance_Data/results_reverse/bleu_evaluation_results_te2ta.csv"
BERTSCORE_OUTPUT_PATH = "./Finance_Data/results_reverse/bertscore_evaluation_results_te2ta.csv"
CHRF_OUTPUT_PATH = "./Finance_Data/results_reverse/chrf_evaluation_results_te2ta.csv"
TER_OUTPUT_PATH = "./Finance_Data/results_reverse/ter_evaluation_results_te2ta.csv"
FILTERED_OUTPUT_PATH = "./Finance_Data/results_reverse/filtered_sentences_te2ta.csv"

# Load the model configuration 
logger.info("Loading model configuration...")
config = AutoConfig.from_pretrained(MODEL_PATH)
if hasattr(config, 'generation_config'):
    if config.generation_config.early_stopping is None:
        config.generation_config.early_stopping = True
else:
    config.early_stopping = True


logger.info("Loading model and tokenizer...")
try:
    mbart_saved_model = MBartForConditionalGeneration.from_pretrained(
        MODEL_PATH,
        config=config
    ).to(device)
except:
    mbart_saved_model = AutoModelForSeq2SeqLM.from_pretrained(
        MODEL_PATH,
        config=config
    ).to(device)


if hasattr(mbart_saved_model, 'generation_config'):
    mbart_saved_model.generation_config.early_stopping = True


mbart_saved_tokenizer = MBart50TokenizerFast.from_pretrained(MODEL_PATH, src_lang="te_IN", tgt_lang="ta_IN")


logger.info("Loading dataset...")
df = pd.read_csv(DATASET_PATH, encoding='utf-8')
print(f"Dataset columns: {df.columns.tolist()}")
print(f"Dataset shape: {df.shape}")
print(f"First few rows:\n{df.head()}")


telugu_col = 'Telugu' 
tamil_col = 'Tamil'   

if telugu_col not in df.columns or tamil_col not in df.columns:
    raise ValueError(f"CSV must contain 'Telugu' and 'Tamil' columns. Available columns: {df.columns.tolist()}")

print(f"Using Telugu column: {telugu_col}")
print(f"Using Tamil column: {tamil_col}")


df = df[[telugu_col, tamil_col]].dropna()

df[telugu_col] = df[telugu_col].astype(str).replace(['nan', 'NaN', ''], '')
df[tamil_col] = df[tamil_col].astype(str).replace(['nan', 'NaN', ''], '')

df = df[(df[telugu_col] != '') & (df[tamil_col] != '')]
df = df.rename(columns={telugu_col: 'telugu_sentence', tamil_col: 'tamil_sentence'})
test_dataset = Dataset.from_pandas(df)
print(f"Test dataset size: {len(test_dataset)}")


def indic_tokenize_text(text):
    if not isinstance(text, str) or not text.strip():
        return ""
    return ' '.join(indic_tokenize.trivial_tokenize(text, lang='ta'))


def mbart_translate_text(input_text, debug=False):
    if not isinstance(input_text, str) or not input_text.strip():
        return ""
    inputs = mbart_saved_tokenizer(input_text, return_tensors="pt", max_length=128, truncation=True, padding=True).to(device)
    if debug:
        logger.info(f"Tokenized Input IDs: {inputs['input_ids'].tolist()}")
    outputs = mbart_saved_model.generate(
        **inputs,
        max_length=256,
        min_length=10,
        num_beams=5,
        early_stopping=True,
        length_penalty=1.2,
        no_repeat_ngram_size=3,
        forced_bos_token_id=mbart_saved_tokenizer.lang_code_to_id["ta_IN"]  # Updated to Tamil
    )
    if debug:
        logger.info(f"Raw Output IDs: {outputs[0].tolist()}")
        logger.info(f"Decoded with special tokens: {mbart_saved_tokenizer.decode(outputs[0], skip_special_tokens=False)}")
    decoded_output = mbart_saved_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return decoded_output.strip()


def generate_translations(dataset, num_samples=NUM_SAMPLES):
    sources = []
    references = []
    hypotheses = []
    
    test_data = dataset.select(range(min(num_samples, len(dataset))))
    logger.info(f"Generating translations for {len(test_data)} samples")
    
    for example in tqdm(test_data, desc="Generating translations"):
        input_text = example["telugu_sentence"] 
        reference = example["tamil_sentence"]   
        
        try:
            hypothesis = mbart_translate_text(input_text, debug=False)
        except Exception as e:
            logger.warning(f"Error translating '{input_text}': {e}")
            hypothesis = ""
            
        sources.append(input_text)
        references.append(reference)
        hypotheses.append(hypothesis)
    
    return test_data, sources, references, hypotheses

# Compute BLEU score
def compute_bleu(test_data, sources, references, hypotheses):
    logger.info("Computing BLEU score...")
    
    tokenized_hypotheses = [indic_tokenize_text(hyp) for hyp in hypotheses]
    tokenized_references = [[indic_tokenize_text(ref)] for ref in references]
    
    bleu = corpus_bleu(tokenized_hypotheses, tokenized_references, tokenize='none')
    bleu_score = bleu.score
    logger.info(f"BLEU Score: {bleu_score:.2f}")
    
    results_df = pd.DataFrame({
        "telugu_sentence": sources,
        "tamil_sentence": references,
        "tamil_hypothesis": hypotheses,
        "bleu_score": [bleu_score] * len(sources)
    })
    results_df.to_csv(BLEU_OUTPUT_PATH, index=False, encoding='utf-8')
    logger.info(f"BLEU results saved to {BLEU_OUTPUT_PATH}")
    
    return bleu_score
    
# Compute chrF++ score
def compute_chrf(test_data, sources, references, hypotheses):
    logger.info("Computing chrF++ score...")
    
    refs_list = [[ref] for ref in references]
    chrf = corpus_chrf(hypotheses, refs_list, char_order=6, word_order=2, beta=2)
    chrf_score = chrf.score
    logger.info(f"chrF++ Score: {chrf_score:.2f}")
    
    results_df = pd.DataFrame({
        "telugu_sentence": sources,
        "tamil_sentence": references,
        "tamil_hypothesis": hypotheses,
        "chrf_score": [chrf_score] * len(sources)
    })
    results_df.to_csv(CHRF_OUTPUT_PATH, index=False, encoding='utf-8')
    logger.info(f"chrF++ results saved to {CHRF_OUTPUT_PATH}")
    
    return chrf_score
    
# Compute TER score
def compute_ter(test_data, sources, references, hypotheses):
    logger.info("Computing TER score...")
    
    refs_list = [[ref] for ref in references]
    ter = corpus_ter(hypotheses, refs_list)
    ter_score = ter.score
    logger.info(f"TER Score: {ter_score:.2f}")
    
    results_df = pd.DataFrame({
        "telugu_sentence": sources,
        "tamil_sentence": references,
        "tamil_hypothesis": hypotheses,
        "ter_score": [ter_score] * len(sources)
    })
    results_df.to_csv(TER_OUTPUT_PATH, index=False, encoding='utf-8')
    logger.info(f"TER results saved to {TER_OUTPUT_PATH}")
    
    return ter_score

# Compute BERTScore and filter by threshold
def compute_bertscore_and_filter(test_data, sources, references, hypotheses):
    logger.info("Computing BERTScore...")
    
    P, R, F1 = bert_score(
        hypotheses,
        references,
        lang="ta", 
        model_type="bert-base-multilingual-cased",
        device=device,
        verbose=True
    )
    
    avg_f1 = F1.mean().item()
    logger.info(f"BERTScore F1: {avg_f1:.4f}")
    
    bert_f1_scores = [f1.item() for f1 in F1]
    
    # Save BERTScore results
    bertscore_df = pd.DataFrame({
        "telugu_sentence": sources,
        "tamil_sentence": references,
        "tamil_hypothesis": hypotheses,
        "bertscore_f1": bert_f1_scores
    })
    bertscore_df.to_csv(BERTSCORE_OUTPUT_PATH, index=False, encoding='utf-8')
    logger.info(f"BERTScore results saved to {BERTSCORE_OUTPUT_PATH}")
    
    
    filtered_df = bertscore_df[bertscore_df['bertscore_f1'] >= SIMILARITY_THRESHOLD]
    filtered_df.to_csv(FILTERED_OUTPUT_PATH, index=False, encoding='utf-8')
    logger.info(f"Filtered results (BERTScore F1 >= {SIMILARITY_THRESHOLD}) saved to {FILTERED_OUTPUT_PATH}")
    print(f"Filtered dataset size: {len(filtered_df)}")
    
    return avg_f1, bert_f1_scores


def evaluate_model():
    test_data, sources, references, hypotheses = generate_translations(test_dataset, NUM_SAMPLES)
    
    bleu_score = compute_bleu(test_data, sources, references, hypotheses)
    bertscore_f1, bert_f1_scores = compute_bertscore_and_filter(test_data, sources, references, hypotheses)
    chrf_score = compute_chrf(test_data, sources, references, hypotheses)
    ter_score = compute_ter(test_data, sources, references, hypotheses)
    
    print("\n" + "="*50)
    print("EVALUATION SUMMARY")
    print("="*50)
    print(f"Number of samples: {len(sources)}")
    print(f"Filtered samples (BERTScore F1 >= {SIMILARITY_THRESHOLD}): {len(pd.read_csv(FILTERED_OUTPUT_PATH))}")
    print(f"BLEU Score: {bleu_score:.2f}")
    print(f"chrF++ Score: {chrf_score:.2f}")
    print(f"TER Score: {ter_score:.2f} (lower is better)")
    print(f"BERTScore F1: {bertscore_f1:.4f}")
    print("="*50)
    
    test_input = "హలో, మీరు ఎలా ఉన్నారు?"  # "Hello, how are you?" in Telugu
    translated_text = mbart_translate_text(test_input, debug=True)
    print(f"\nTest Translation:")
    print(f"Source (Telugu): {test_input}")
    print(f"Target (Tamil): {translated_text}")
    
    return {
        "bleu": bleu_score,
        "chrf": chrf_score,
        "ter": ter_score,
        "bertscore": bertscore_f1
    }

# Run the evaluation
evaluate_model()

INFO:__main__:Using device: cuda
INFO:__main__:Loading model configuration...
INFO:__main__:Loading model and tokenizer...
INFO:__main__:Loading dataset...
INFO:__main__:Generating translations for 800 samples


Dataset columns: ['index', 'Tamil', 'Telugu', 'tamil_vocab_score', 'telugu_vocab_score', 'reason', 'similarity_score']
Dataset shape: (838, 7)
First few rows:
   index                                              Tamil  \
0     64  ஒரு மாத கடனுக்கு 8.45%, மூன்று மாதங்களுக்கு 8....   
1     88               கிரெடிட் கார்டில் பல நன்மைகள் உள்ளன.   
2    108  2016 ஆம் ஆண்டில், இது 62.7 பில்லியன் டாலராகவும...   
3    118  ஜூலை 1 முதல் நாடு முழுவதும் ஜிஎஸ்டி செயல்படுத்...   
4    135  அமெரிக்காவில் வட்டி விகிதங்கள் அதிகரித்தால், உ...   

                                              Telugu  tamil_vocab_score  \
0  ఒక నెల కాలావ‌ధి రుణాల‌కు 8.45%, మూడు నెల‌ల కాల...           0.800000   
1     క్రెడిట్ కార్డు వ‌ల్ల చాలా ప్ర‌యోజ‌నాలున్నాయి.           1.000000   
2  2016 సంవత్సరంలో 62.7 బిలియన్‌ డాలర్లు, 2017 65...           0.733333   
3  జులై 1 నుంచి దేశ‌వ్యాప్తంగా జీఎస్టీ అమ‌లు అవ‌న...           0.923077   
4  అమెరికాలో వ‌డ్డీ రేట్లు పెరిగితే బంగారం ధ‌ర‌లు...           1.000000   

   telugu_v

Generating translations: 100%|████████████████| 800/800 [03:24<00:00,  3.91it/s]
INFO:__main__:Computing BLEU score...
INFO:__main__:BLEU Score: 61.09
INFO:__main__:BLEU results saved to ./Finance_Data/results_reverse/bleu_evaluation_results_te2ta.csv
INFO:__main__:Computing BERTScore...


calculating scores...
computing bert embedding.


  0%|          | 0/24 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:BERTScore F1: 0.8850
INFO:__main__:BERTScore results saved to ./Finance_Data/results_reverse/bertscore_evaluation_results_te2ta.csv
INFO:__main__:Filtered results (BERTScore F1 >= 0.85) saved to ./Finance_Data/results_reverse/filtered_sentences_te2ta.csv
INFO:__main__:Computing chrF++ score...


done in 1.29 seconds, 618.71 sentences/sec
Filtered dataset size: 568


INFO:__main__:chrF++ Score: 73.30
INFO:__main__:chrF++ results saved to ./Finance_Data/results_reverse/chrf_evaluation_results_te2ta.csv
INFO:__main__:Computing TER score...
INFO:__main__:TER Score: 133.78
INFO:__main__:TER results saved to ./Finance_Data/results_reverse/ter_evaluation_results_te2ta.csv
INFO:__main__:Tokenized Input IDs: [[250045, 24147, 1296, 4, 22735, 24722, 91064, 32, 2]]
INFO:__main__:Raw Output IDs: [2, 250044, 39507, 66705, 4, 29947, 128251, 37961, 19238, 32, 2]
INFO:__main__:Decoded with special tokens: </s>ta_IN ஹலோ, எப்படி இருக்கிறாய் நீ?</s>



EVALUATION SUMMARY
Number of samples: 800
Filtered samples (BERTScore F1 >= 0.85): 568
BLEU Score: 61.09
chrF++ Score: 73.30
TER Score: 133.78 (lower is better)
BERTScore F1: 0.8850

Test Translation:
Source (Telugu): హలో, మీరు ఎలా ఉన్నారు?
Target (Tamil): ஹலோ, எப்படி இருக்கிறாய் நீ?


{'bleu': 61.090916971845274,
 'chrf': 73.30075376298603,
 'ter': 133.77926421404683,
 'bertscore': 0.8850445747375488}