<h1><b> 2B SETUP

In [3]:
from datasets import load_dataset
val_dataset    =   load_dataset("wmt16","de-en", split="validation")
test_dataset   =   load_dataset("wmt16","de-en", split="test")

In [4]:
import numpy as np
from tqdm import tqdm
def extract_sentences(datadict):
    de_sentences=[]
    en_sentences=[]
    for de_en in datadict["translation"]:
        de_sentences.append(de_en['de'].lower())
        en_sentences.append(de_en['en'].lower())
    return np.array(de_sentences), np.array(en_sentences)

In [5]:
val_de   , val_en    =  extract_sentences(val_dataset)
test_de  , test_en   =  extract_sentences(test_dataset)

In [6]:
from transformers import T5ForConditionalGeneration, T5Tokenizer
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [11]:
val_translations=[]
test_translations=[]

for sentence in tqdm(val_en):
    eng_sentence = sentence
    prefix = "translate English to German: "

    input_text = prefix + eng_sentence
    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    output = model.generate(input_ids, max_length=100, num_beams=4, early_stopping=True)
    german_translation = tokenizer.decode(output[0], skip_special_tokens=True)

    # print("English: ", eng_sentence)
    # print("german Translation: ", german_translation)
    val_translations.append(german_translation)


for sentence in tqdm(test_en):
    eng_sentence = sentence
    prefix = "translate English to German: "

    input_text = prefix + eng_sentence
    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    output = model.generate(input_ids, max_length=100, num_beams=4, early_stopping=True)
    german_translation = tokenizer.decode(output[0], skip_special_tokens=True)

    # print("English: ", eng_sentence)
    # print("german Translation: ", german_translation)
    test_translations.append(german_translation)


100%|██████████| 2169/2169 [27:50<00:00,  1.30it/s]
100%|██████████| 2999/2999 [39:30<00:00,  1.27it/s]  


In [12]:
import pickle
with open('translations.pkl','wb') as f:
    pickle.dump([val_translations,test_translations],f)

In [1]:
import pickle
with open('translations.pkl',"rb") as f:
    val_translations,test_translations=pickle.load(f)

In [2]:
import evaluate
bleu=evaluate.load("bleu")
meteor=evaluate.load("meteor")
bertScore=evaluate.load("bertscore")

[nltk_data] Downloading package wordnet to /home/iiitd/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /home/iiitd/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/iiitd/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [6]:
def evaluationMetrics(reference, predictions):
    for i in range(1,5):
        print(f"BLEU-{i} evaluation:",bleu.compute(references=reference,predictions=predictions,max_order=i))

    print("\nMETEOR Score: ",meteor.compute(references=reference,predictions=predictions))

    print("\nBERTScore: ", bertScore.compute(predictions=reference,references=predictions,lang='en'))

In [7]:
print("Validation dataset zero-Shot evaluation : ")
evaluationMetrics(val_de,val_translations)
print("\nTest dataset zero-Shot evaluation : ")
evaluationMetrics(test_de,test_translations)

Validation dataset zero-Shot evaluation : 
BLEU-1 evaluation: {'bleu': 0.4757060634453219, 'precisions': [0.4757060634453219], 'brevity_penalty': 1.0, 'length_ratio': 1.012788070492544, 'translation_length': 44826, 'reference_length': 44260}
BLEU-2 evaluation: {'bleu': 0.30666572511914747, 'precisions': [0.4757060634453219, 0.19769322737182643], 'brevity_penalty': 1.0, 'length_ratio': 1.012788070492544, 'translation_length': 44826, 'reference_length': 44260}
BLEU-3 evaluation: {'bleu': 0.20943201112543214, 'precisions': [0.4757060634453219, 0.19769322737182643, 0.09767843912077057], 'brevity_penalty': 1.0, 'length_ratio': 1.012788070492544, 'translation_length': 44826, 'reference_length': 44260}
BLEU-4 evaluation: {'bleu': 0.14705278059281643, 'precisions': [0.4757060634453219, 0.19769322737182643, 0.09767843912077057, 0.05090539059646193], 'brevity_penalty': 1.0, 'length_ratio': 1.012788070492544, 'translation_length': 44826, 'reference_length': 44260}

METEOR Score:  {'meteor': 0.530

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



BERTScore:  {'precision': [0.8894785046577454, 0.9076931476593018, 0.9242793917655945, 0.896828293800354, 0.8904922604560852, 0.9104254245758057, 0.9490283727645874, 0.9059965014457703, 0.9063154458999634, 0.9123014211654663, 0.8627183437347412, 0.9331455230712891, 0.8971627950668335, 0.9485757350921631, 0.882827639579773, 0.9121001958847046, 0.9253000617027283, 0.8663098812103271, 0.8813304901123047, 0.878788948059082, 0.874433696269989, 0.9072525501251221, 0.8928102254867554, 0.9062881469726562, 0.8859453797340393, 0.9064558148384094, 0.9366355538368225, 0.8936944603919983, 0.9012700319290161, 0.9004562497138977, 0.9217568635940552, 0.8901947736740112, 0.9160059094429016, 0.8807432055473328, 0.8830481767654419, 0.8726894855499268, 0.9376012086868286, 0.9044404625892639, 0.8604651093482971, 0.9350894689559937, 0.9031029939651489, 0.9162684679031372, 0.8877121210098267, 0.9583418965339661, 0.9037118554115295, 0.9257569313049316, 0.899071991443634, 0.8844363689422607, 0.880003988742828

: 

In [None]:
sentence=input("enter your sentence :")
eng_sentence = sentence
prefix = "translate English to German: "

input_text = prefix + eng_sentence
input_ids = tokenizer.encode(input_text, return_tensors="pt")
output = model.generate(input_ids, max_length=100, num_beams=4, early_stopping=True)
german_translation = tokenizer.decode(output[0], skip_special_tokens=True)

print(german_translation)

In [None]:
import pandas as pd
data=pd.read_csv("input.csv")
english_sentences=data['en'].tolist()

translations=[]
for sentence in tqdm(english_sentences):
    eng_sentence = sentence
    prefix = "translate English to German: "

    input_text = prefix + eng_sentence
    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    output = model.generate(input_ids, max_length=100, num_beams=4, early_stopping=True)
    german_translation = tokenizer.decode(output[0], skip_special_tokens=True)
    translations.append(german_translation)

data["de"]=translations
data