In [None]:
import pandas as pd
import evaluate
import random
import torch
import numpy as np

In [None]:
seed_value = 53

random.seed(seed_value)
np.random.seed(seed_value)
#pd.random.seed(seed_value)
torch.manual_seed(seed_value)
torch.cuda.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value)

In [None]:
def calculate_bleu(pred_list, ref_list):
    bleu = evaluate.load("bleu")
    return bleu.compute(predictions=pred_list, references=ref_list, max_order=4)['bleu']
    # print(bleu.compute(predictions=pred_list, references=ref_list, max_order=4)['bleu'])

In [None]:
data_path = '../data/'

### Translation and Back-translation in MSF-BT process

In [None]:
lang1 = 'ur' # all the langs one by one
train_df = pd.read_csv(data_path+lang1+'_train.csv')
dev_df = pd.read_csv(data_path+lang1+'_dev.csv')
test_df = pd.read_csv(data_path+lang1+'_test.csv')

In [None]:
lang2 = 'en' #For lang1 = 'en', this will 'hi' otherwise it is 'en', but for back-translation lang1 and 2 will be same
en_train_df = pd.read_csv(data_path+lang2+'_train.csv')
en_dev_df = pd.read_csv(data_path+lang2+'_dev.csv')
en_test_df = pd.read_csv(data_path+lang2+'_test.csv')

In [None]:
# For translation
pred_pos_column = 'POSITIVE_TR'
pred_neg_column = 'NEGATIVE_TR'

# For back-translation
# pred_pos_column = 'POSITIVE_TR_TR'
# pred_neg_column = 'NEGATIVE_TR_TR'

ref_pos_column = 'POSITIVE'
ref_neg_column = 'NEGATIVE'

In [None]:
dfs = [train_df, dev_df, test_df]
refs = [en_train_df, en_dev_df, en_test_df]

pred_data = []
ref_data = []
for df, ref in zip(dfs, refs):
    pred_data.extend(df[pred_pos_column].tolist() + df[pred_neg_column].tolist())
    ref_data.extend(ref[ref_pos_column].tolist() + ref[ref_neg_column].tolist())

print(f"BLEU Score for {lang1}:")
calculate_bleu(pred_data, ref_data)

### Translations

#### BLEU Score for en:
0.2068746173173193

#### BLEU Score for hi:
0.2608698044490646

#### BLEU Score for mag:
0.18119490941562166

#### BLEU Score for ml:
0.32859407934998675

#### BLEU Score for mr:
0.32360376399016005

#### BLEU Score for or:
0.33073082902902456

#### BLEU Score for pa:
0.3455499437168114

#### BLEU Score for te:
0.24689777485272318

#### BLEU Score for ur:
0.3841462156590448


### Back-translations

#### BLEU Score for en:
0.42631642755111215

#### BLEU Score for hi:
0.29972691999559

#### BLEU Score for mag:
0.07971240072682187

#### BLEU Score for ml:
0.20690417918405446

#### BLEU Score for mr:
0.2732078802698464

#### BLEU Score for or:
0.21772176469405635

#### BLEU Score for pa:
0.38206792153290997

#### BLEU Score for te:
0.14217740939303822

#### BLEU Score for ur:
0.4098485427039675


### Crosslingual experiments in En-IP-TR-Train experiments

In [None]:
en_train_df = pd.read_csv(data_path+'en_train.csv')
en_dev_df = pd.read_csv(data_path+'en_dev.csv')
en_test_df = pd.read_csv(data_path+'en_test.csv')

In [None]:
languages = {
    'hin': 'Deva',  # hin -> hi
    'mag': 'Deva',  # mag -> mag
    'mal': 'Mlym',   # mal -> ml
    'mar': 'Deva',  # mar -> mr
    'ory': 'Orya',  # ory -> or
    'pan': 'Guru',  # pan -> pa
    'tel': 'Telu',  # tel -> te
    'urd': 'Arab'   # urd -> ur
}

ref_languages = ['hi', 'mag', 'ml', 'mr', 'or', 'pa', 'te', 'ur']

for index, (lang, suffix) in enumerate(list(languages.items())):
    positive_col = f"POSITIVE_{lang}_{suffix}"
    negative_col = f"NEGATIVE_{lang}_{suffix}"
    pred_texts_train = en_train_df[positive_col].tolist() + en_train_df[negative_col].tolist()
    pred_texts_dev = en_dev_df[positive_col].tolist() + en_dev_df[negative_col].to_list()
    pred_texts_test = en_test_df[positive_col].tolist() + en_test_df[negative_col].tolist()
    pred_texts = pred_texts_train + pred_texts_dev + pred_texts_test


    train_df = pd.read_csv(f"{data_path}{ref_languages[index]}_train.csv")
    dev_df = pd.read_csv(f"{data_path}{ref_languages[index]}_dev.csv")
    test_df = pd.read_csv(f"{data_path}{ref_languages[index]}_test.csv")
    
    trg_texts_train_positive = train_df['POSITIVE'].tolist()
    trg_texts_train_negative = train_df['NEGATIVE'].tolist()
    
    trg_texts_dev_positive = dev_df['POSITIVE'].tolist()
    trg_texts_dev_negative = dev_df['NEGATIVE'].tolist()
    
    trg_texts_test_positive = test_df['POSITIVE'].tolist()
    trg_texts_test_negative = test_df['NEGATIVE'].tolist()
    
    trg_texts_train = trg_texts_train_positive + trg_texts_train_negative
    trg_texts_dev = trg_texts_dev_positive + trg_texts_dev_negative
    trg_texts_test = trg_texts_test_positive + trg_texts_test_negative
    
    trg_texts = trg_texts_train + trg_texts_dev + trg_texts_test



    print(f"BLEU Score for {ref_languages[index]}:")
    calculate_bleu(pred_texts, trg_texts)
    print('\n')

### En-OP-TR

In [None]:
import os
import pandas as pd
# from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

languages = ['hi', 'mag', 'ml', 'mr', 'or', 'pa', 'te', 'ur']

directory = '../output/'

# def calculate_bleu(reference, hypothesis):
#     smoothie = SmoothingFunction().method4
#     return sentence_bleu([reference.split()], hypothesis.split(), smoothing_function=smoothie)

bleu_scores = {}

for lang in languages:
    pos_to_neg_source = pd.read_csv(os.path.join(directory, f'pos_to_neg-{lang}-en_op_tr.csv'))
    neg_to_pos_source = pd.read_csv(os.path.join(directory, f'neg_to_pos-{lang}-en_op_tr.csv'))
    
    source_preds = pos_to_neg_source['pred'].tolist() + neg_to_pos_source['pred'].tolist()

    pos_to_neg_target = pd.read_csv(os.path.join(directory, f'pos_to_neg-{lang}-parallel.csv'))
    neg_to_pos_target = pd.read_csv(os.path.join(directory, f'neg_to_pos-{lang}-parallel.csv'))
    
    target_preds = pos_to_neg_target['pred'].tolist() + neg_to_pos_target['pred'].tolist()

    if len(source_preds) == len(target_preds):
        bleu_score = calculate_bleu(source_preds, target_preds) #sum(calculate_bleu(ref, hyp) for ref, hyp in zip(source_preds, target_preds)) / len(source_preds)
        bleu_scores[lang] = bleu_score*100
    else:
        print(f"Mismatch in number of predictions for {lang}. Skipping BLEU calculation.")

for lang, score in bleu_scores.items():
    print(f"{lang}: BLEU Score = {score:.1f}")