# Data Preprocessing

In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

In [2]:
# %cd /content/drive/MyDrive/MSC_Intro_to_NLP_Group_Project/

In [3]:
!pip install nltk
!pip install gensim
!pip install 'transformers[torch]'
!pip install datasets
!pip install tensorflow
!pip install torch
!pip install contractions




In [4]:
import json
from nltk.tokenize import word_tokenize
from gensim.models import Word2Vec
import numpy as np
import pandas as pd
from transformers import AutoTokenizer
from datasets import load_dataset
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import os
import re
import nltk
import string
from nltk import word_tokenize, pos_tag
import contractions
import pandas as pd
import spacy
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
!python -m spacy download en_core_web_sm

[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m93.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [5]:
# Using the GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
gpu_name = torch.cuda.get_device_name(device)
print(gpu_name)

cuda
Tesla T4


In [6]:
# Load spaCy model outside of the function to avoid reloading it each time the function is called
nlp = spacy.load("en_core_web_sm")

# Preprocessing

In [7]:
# Preprocessing functions
# Function to capitalize the first letter of each sentence and proper nouns
def capitalize_first_and_proper_nouns(text):
    # Process the text using spaCy to create a Doc object
    doc = nlp(text)

    result = []

    # Iterate over the sentences in the Doc
    for sent in doc.sents:
        # Iterate over the tokens in the sentence
        for token in sent:
            # Capitalize the first letter of each sentence and proper nouns
            if token.is_sent_start or token.pos_ == 'PROPN':
                result.append(token.text.capitalize())
            else:
                result.append(token.text)

    # Rejoin the tokens into a single string
    return ' '.join(result)

# Function to remove repeated punctuations
def remove_repeated_punctuations(sentence):
    # Use regular expression to remove consecutive repeated punctuations
    cleaned_sentence = re.sub(r'(\W)\1+', r'\1', sentence)
    return cleaned_sentence

# Function to expand contractions
def expand_contractions(text):
    return contractions.fix(text)

# Define a tokenization function
def tokenize_sentences(sentences):
    return [word_tokenize(sentence) for sentence in sentences]

def fix_general_spacing(sentence):
    # Fix space before punctuation (like ' ,' to ',')
    sentence = re.sub(r'\s([,.?!:;])', r'\1', sentence)
    # Fix space after punctuation (like ' . ' to '. ')
    sentence = re.sub(r'([,.?!:;])\s', r'\1 ', sentence)
    # Fix space in contractions (like "don 't" to "don't")
    sentence = re.sub(r"\b(\w+)\s('t|'s|'m|'ll|'ve|'re|'d|n't)\b", r"\1\2", sentence)
    # Reduce multiple spaces between words to a single space
    sentence = re.sub(r'\s{2,}', ' ', sentence)
    return sentence

def preprocess(text):
    # text = text.lower()
    text = expand_contractions(text)
    text = remove_repeated_punctuations(text)
    text = capitalize_first_and_proper_nouns(text)
    text = fix_general_spacing(text)
    return text


In [8]:
# Load the preprocessed data from the JSON file
# data_files={
#     "train":"data_train.json",
# }

# dataset = load_dataset("json", data_files=data_files)
# print(dataset)

json_file_path = "data_train.json"

with open(json_file_path, 'r') as file:
    dataset = json.load(file)


# A dataset class to load and preprocess the data

In [9]:
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        return {
            "id": item["id"],
            "topic": item["topic"],
            "input_seq": torch.tensor(item["input_seq"]),
            "output_seq": torch.tensor(item["output_seq"]),
        }


# Creating a Bi-Directional RNN Model

In [10]:
# Preprocessing the whole dataset's informal sentences
def preprocess_data(data):
    for item in data:
        item["transformation"]["informal"] = preprocess(item["transformation"]["informal"])
    return data

In [11]:
# preprocessed_data = preprocess_data(dataset)


In [12]:
# tokenizing the whole dataset
def tokenize_data(data):
    for item in data:
        item["tokenized_informal"] = word_tokenize(item["transformation"]["informal"])
        item["tokenized_formal"] = word_tokenize(item["transformation"]["formal.ref0"])
    return data


In [13]:
# assigning indices to each unique word and creating a vocabulary
def build_vocab(data):
    input_vocab = set()
    output_vocab = set()

    for item in data:
        input_vocab.update(item["tokenized_informal"])
        output_vocab.update(item["tokenized_formal"])

    input_vocab = {word: idx for idx, word in enumerate(input_vocab)}
    output_vocab = {word: idx for idx, word in enumerate(output_vocab)}

    return input_vocab, output_vocab


In [14]:
# function for padding the sequence with max length provided
def pad_sequence(sequence, max_length, vocab):
    padded_sequence = [vocab[word] for word in sequence if word in vocab]
    padded_sequence += [0] * (max_length - len(padded_sequence))
    return padded_sequence

# function to convert tokenized informal and formal sentences in the dataset using padding of 65 tokens
def numericalize_data(data, input_vocab, output_vocab):
#     max_input_length = max(len(item["tokenized_informal"]) for item in data)
#     max_output_length = max(len(item["tokenized_formal"]) for item in data)
    max_input_length = 65
    max_output_length = 65

    for item in data:
        item["input_seq"] = pad_sequence(item["tokenized_informal"], max_input_length, input_vocab)
        item["output_seq"] = pad_sequence(item["tokenized_formal"], max_output_length, output_vocab)

    return data


## Defining a Bi-Directional RNN Model using LSTM from Pytorch

In [15]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output)
        return torch.nn.functional.log_softmax(output, dim=-1)


In [16]:
# Training function
def train_model(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}"):
            inputs = batch["input_seq"].to(device)
            targets = batch["output_seq"].to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs.view(-1, len(output_vocab)), targets.view(-1))
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch + 1}, Loss: {total_loss / len(train_loader)}")


### Preparing the data

In [17]:
data = preprocess_data(dataset)
data = tokenize_data(data)


In [18]:
with open("./data_train_preprocess_n_token.json", 'w') as f:
  json.dump(data, f, ensure_ascii=False)


In [19]:
print(data[1])


{'id': 1, 'topic': 'Family_Relationships', 'transformation': {'informal': 'Hmmm, I am a guy suffering from verbal abuse from my wife.', 'formal.ref0': 'I suffer through verbal abuse from my wife.', 'formal.ref1': '', 'formal.ref2': '', 'formal.ref3': ''}, 'tokenized_informal': ['Hmmm', ',', 'I', 'am', 'a', 'guy', 'suffering', 'from', 'verbal', 'abuse', 'from', 'my', 'wife', '.'], 'tokenized_formal': ['I', 'suffer', 'through', 'verbal', 'abuse', 'from', 'my', 'wife', '.']}


In [20]:
# numericalizing the data
input_vocab, output_vocab = build_vocab(data)
data = numericalize_data(data, input_vocab, output_vocab)


In [21]:
print(len(data))
print("input_seq: ")
print(len(data[0]['input_seq']))
print(data[0]['input_seq'])
print("output_seq: ")
print(len(data[0]['output_seq']))
print(data[0]['output_seq'])


104562
input_seq: 
65
[46250, 3156, 12762, 6609, 5753, 3156, 45679, 34319, 33475, 47083, 13699, 2022, 27820, 14588, 40840, 260, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
output_seq: 
65
[24464, 23963, 8444, 9839, 1421, 19827, 10471, 29155, 180, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [22]:
# training parameters
train_data = data
train_dataset = CustomDataset(train_data)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
num_epochs = 10


In [23]:
# getting everything together for the model
model = RNN(len(input_vocab), 256, len(output_vocab)).to(device)
criterion = nn.CrossEntropyLoss(ignore_index=0)
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [24]:
# training the model
trained_model = train_model(model, train_loader, criterion, optimizer, num_epochs)


Epoch 1/10: 100%|██████████| 3268/3268 [04:38<00:00, 11.73it/s]


Epoch 1, Loss: 5.68937271651031


Epoch 2/10: 100%|██████████| 3268/3268 [04:41<00:00, 11.63it/s]


Epoch 2, Loss: 5.276047459138943


Epoch 3/10: 100%|██████████| 3268/3268 [04:41<00:00, 11.63it/s]


Epoch 3, Loss: 5.112479813095989


Epoch 4/10: 100%|██████████| 3268/3268 [04:41<00:00, 11.62it/s]


Epoch 4, Loss: 5.0334168777617565


Epoch 5/10: 100%|██████████| 3268/3268 [04:41<00:00, 11.61it/s]


Epoch 5, Loss: 4.986323413533709


Epoch 6/10: 100%|██████████| 3268/3268 [04:41<00:00, 11.61it/s]


Epoch 6, Loss: 4.952393132440901


Epoch 7/10: 100%|██████████| 3268/3268 [04:41<00:00, 11.61it/s]


Epoch 7, Loss: 4.922186558585126


Epoch 8/10: 100%|██████████| 3268/3268 [04:41<00:00, 11.61it/s]


Epoch 8, Loss: 4.900054754796489


Epoch 9/10: 100%|██████████| 3268/3268 [04:41<00:00, 11.60it/s]


Epoch 9, Loss: 4.878925438318287


Epoch 10/10: 100%|██████████| 3268/3268 [04:41<00:00, 11.60it/s]

Epoch 10, Loss: 4.853318162034454





In [25]:
# saving the entire model
torch.save(model.state_dict(), 'rnn_model.pth')

# saving the vocabularies as well for future reference
vocabularies = {
    'input_vocab': input_vocab,
    'output_vocab': output_vocab
}

with open('vocabularies.json', 'w') as vocab_file:
    json.dump(vocabularies, vocab_file)


In [26]:
max_input_length = 65
# loading the vocabularies
with open('./vocabularies.json', 'r') as vocab_file:
    loaded_vocabularies = json.load(vocab_file)

input_vocab = loaded_vocabularies['input_vocab']
output_vocab = loaded_vocabularies['output_vocab']

# creating a new instance of the RNN model
loaded_model = RNN(len(input_vocab), 256, len(output_vocab)).to(device)

# loading the trained model weights
map_location=torch.device('cpu')
loaded_model.load_state_dict(torch.load('./rnn_model.pth'))
loaded_model.eval()  # Setting the model to evaluation mode

def predict(model, input_sequence, input_vocab, output_vocab):
    # Preprocess and tokenize input_sequence
    preprocessed_input = preprocess(input_sequence)
    tokenized_input = word_tokenize(preprocessed_input)

    # Numericalize input using the loaded vocabularies
    numericalized_input = pad_sequence(tokenized_input, max_input_length, input_vocab)

    # Convert numericalized input to PyTorch tensor
    input_tensor = torch.tensor(numericalized_input).unsqueeze(0).to(device)

    # Make prediction using the loaded model
    with torch.no_grad():
        model_output = model(input_tensor)

    # Process the model output
    _, predicted_indices = torch.max(model_output, dim=2 )

    # Convert predicted indices to words using the output vocabulary
    predicted_words = [word for index in predicted_indices.squeeze().tolist() for word, idx in output_vocab.items() if idx == index]

    # Join the words into a string (or use as needed)
    processed_output = ' '.join(predicted_words)

    return processed_output


In [27]:
# example usage:
input_sentence = "Luck isn't everything that you may wanna rely upon!!!"
predictions = predict(loaded_model, input_sentence, input_vocab, output_vocab)
print("Predicted output:", predictions)


Predicted output: luck is not everything that you may want to rely stumbled . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .


In [28]:
predictions = preprocess(predictions)
print(predictions)

Luck is not everything that you may want to rely stumbled......................................................


In [29]:
predictions = preprocess(predictions)
print(predictions)


Luck is not everything that you may want to rely stumbled.


# Metrics Implementation

## Implementing TERp and PINC

In [30]:
!pip install torchmetrics
!pip install nltk sacrebleu




In [31]:
import pandas as pd
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import numpy as np
import json
from transformers import EncoderDecoderModel
import torchmetrics
from datasets import Dataset
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer
import gc
import torch
import math
import sacrebleu
import nltk
from nltk.translate.bleu_score import sentence_bleu
from nltk import ngrams
from nltk.stem import PorterStemmer
from nltk.corpus import wordnet
nltk.download('wordnet')


2024-02-10 23:13:44.490798: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-10 23:13:44.537966: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-10 23:13:44.538001: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-10 23:13:44.539525: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-10 23:13:44.550150: I tensorflow/core/platform/cpu_feature_guar

True

In [32]:
#Higher is better
def calculate_terp(hypothesis: str, reference: str, phrase_table=list(), edit_costs=list()):
    hypothesis_tokens = hypothesis.split()
    reference_tokens = reference.split()

    # TERp by Stem Matches, Synonym Matches, and Phrase Substitutions
    stem_matches = calculate_stem_matches(hypothesis_tokens, reference_tokens)
    synonym_matches = calculate_synonym_matches(hypothesis_tokens, reference_tokens)
    phrase_substitutions = calculate_phrase_substitutions(hypothesis_tokens, reference_tokens, phrase_table, edit_costs)

    # Calculate TERp score
    terp_score = (stem_matches + synonym_matches + phrase_substitutions) / len(reference_tokens)

    return terp_score

def calculate_stem_matches(hypothesis_tokens, reference_tokens):
    stemmer = PorterStemmer()
    stem_matches = sum(1 for hyp_token, ref_token in zip(hypothesis_tokens, reference_tokens)
                      if stemmer.stem(hyp_token.lower()) == stemmer.stem(ref_token.lower()))
    return stem_matches

def calculate_synonym_matches(hypothesis_tokens, reference_tokens):
    synonym_matches = sum(1 for hyp_token, ref_token in zip(hypothesis_tokens, reference_tokens)
                          if are_synonyms(hyp_token.lower(), ref_token.lower()))
    return synonym_matches

def are_synonyms(word1, word2):
    synsets1 = wordnet.synsets(word1)
    synsets2 = wordnet.synsets(word2)

    return any(set1.wup_similarity(set2) > 0.7 for set1 in synsets1 for set2 in synsets2)

def calculate_phrase_substitutions(hypothesis_tokens, reference_tokens, phrase_table, edit_costs):
    substitution_cost = 0

    for i in range(len(hypothesis_tokens)):
        for j in range(len(reference_tokens)):
            if (hypothesis_tokens[i], reference_tokens[j]) in phrase_table:
                # Retrieve paraphrase information from the phrase table
                paraphrase_info = phrase_table[(hypothesis_tokens[i], reference_tokens[j])]

                # Calculate the cost using the provided formula
                cost = (
                    edit_costs['w1'] +
                    edit_costs['w2'] * paraphrase_info['edit'] * math.log(paraphrase_info['probability']) +
                    edit_costs['w3'] * paraphrase_info['edit'] * paraphrase_info['probability'] +
                    edit_costs['w4'] * paraphrase_info['edit']
                )

                # Ensure the substitution cost is not negative
                substitution_cost += max(0, cost)

    return substitution_cost

def terp_alignment(hypothesis, reference, phrase_table=list(), edit_costs=list()):
    alignment = []

    for hyp_token, ref_token in zip(hypothesis.split(), reference.split()):
        if hyp_token == ref_token:
            alignment.append((hyp_token, ref_token, "Exact Match"))
        else:
            alignment.append((hyp_token, ref_token, "Mismatch"))

    return alignment

def calculate_pinc(hypothesis: str, reference: str, n: int):
    hypothesis_split = hypothesis.split()
    reference_split = reference.split()

    hypothesis_ngrams = set(ngrams(hypothesis_split, n))
    reference_ngrams = set(ngrams(reference_split, n))
    new_ngrams = hypothesis_ngrams - reference_ngrams
    pinc_score = len(new_ngrams) / len(hypothesis_ngrams)

    return pinc_score


In [33]:
# example usage:
hypothesis_sentence = "This is an example sentence."
reference_sentence = "This is an example sentence."

# calculating TERp score
terp_score = calculate_terp(hypothesis_sentence, reference_sentence)
print(f"TERp Score: {terp_score}")

# generating alignment
alignment = terp_alignment(hypothesis_sentence, reference_sentence)
print("Alignment:", alignment)


TERp Score: 1.6
Alignment: [('This', 'This', 'Exact Match'), ('is', 'is', 'Exact Match'), ('an', 'an', 'Exact Match'), ('example', 'example', 'Exact Match'), ('sentence.', 'sentence.', 'Exact Match')]


In [34]:
pinc_score = calculate_pinc(hypothesis_sentence, reference_sentence, 2)
print(f"PINC Score: {pinc_score}")


PINC Score: 0.0


In [35]:
def terp(preds, refs):
  scores = np.zeros(len(preds), dtype=float)
  for i in range(len(preds)):
    pred = preds[i]
    ref = refs[i]
    score = np.min(np.array(list(map(lambda x: calculate_terp(pred, x), ref))))
    scores[i] = score

  return np.mean(scores)

In [36]:
def pinc(preds, refs, n=2):
  scores = np.zeros(len(preds), dtype=float)
  for i in range(len(preds)):
    pred = preds[i]
    ref = refs[i]
    score = np.min(np.array(list(map(lambda x: calculate_pinc(pred, x, n), ref))))
    scores[i] = score

  return np.mean(scores)


In [37]:
data_files={
    "val":"./GYAFC_Corpus/data_val.json",
    "test":"./GYAFC_Corpus/data_test.json",
}


In [38]:
# Load data from the validation set
with open(data_files["val"], 'r') as file:
    val_data = json.load(file)

# Load data from the test set
with open(data_files["test"], 'r') as file:
    test_data = json.load(file)


In [39]:
# Separate informal, formal references for evaluation
val_informal = [item['transformation']['informal'] for item in val_data]
val_formal_refs = [
    [item['transformation'][f'formal.ref{i}'] for i in range(4)] for item in val_data
]

test_informal = [item['transformation']['informal'] for item in test_data]
test_formal_refs = [
    [item['transformation'][f'formal.ref{i}'] for i in range(4)] for item in test_data
]


In [40]:
print(len(test_formal_refs))
print(len(val_formal_refs))

2748
5665


In [41]:
model.eval()
val_preds = [preprocess(predict(model, input_seq, input_vocab, output_vocab)) for input_seq in val_informal]
print(val_preds[0])
test_preds = [preprocess(predict(model, input_seq, input_vocab, output_vocab)) for input_seq in test_informal]
print(test_preds[0])



If you are under eighteen you have a I Problem.......................................................
, So what if it is a rebound relationship for both of you?...................................................


In [42]:
# Evaluate TERp score on validation set
val_terp_score = terp(val_preds, val_formal_refs)
print(f"Average TERp Score on Validation Set: {val_terp_score}")
# Evaluate PINC score on validation set
val_pinc_score = pinc(val_preds, val_formal_refs, n=2)
print(f"Average PINC Score on Validation Set (n=2): {val_pinc_score}")

# Evaluate TERp score on test set
test_terp_score = terp(test_preds, test_formal_refs)
print(f"Average TERp Score on Test Set: {test_terp_score}")
# Evaluate PINC score on validation set
test_pinc_score = pinc(test_preds, test_formal_refs, n=2)
print(f"Average PINC Score on Test Set (n=2): {test_pinc_score}")


Average TERp Score on Validation Set: 0.12609746638995742
Average PINC Score on Validation Set (n=2): 0.5607699082789491
Average TERp Score on Test Set: 0.13361001705850603
Average PINC Score on Test Set (n=2): 0.544418614460787


## Implementing BLEU and TER from torchmetrics

In [43]:
!pip install torchmetrics




In [44]:
import torch
import torchmetrics
from torchmetrics.text import BLEUScore
from torchmetrics.text import TranslationEditRate

def calculate_bleu(hypothesis, reference):
    bleu_metric = torchmetrics.BLEUScore()
    return bleu_metric(hypothesis, reference)

def calculate_ter(hypothesis, reference):
    ter_metric = torchmetrics.TranslationEditRate()
    return ter_metric(hypothesis, reference)

# Example usage:
hypothesis_sentence = "This is an example sentence."
reference_sentence = "This is an example sentence."

# Calculate BLEU score on Validation Set
bleu_score = calculate_bleu(val_preds, val_formal_refs)
print(f"Average BLEU Score on Validation Set: {bleu_score}")
# Calculate TER score on Validation Set
ter_score = calculate_ter(val_preds, val_formal_refs)
print(f"Average TER Score on Validation Set: {ter_score}")

# Calculate BLEU score on Test Set
bleu_score = calculate_bleu(test_preds, test_formal_refs)
print(f"Average BLEU Score on Test Set: {bleu_score}")
# Calculate TER score on Test Set
ter_score = calculate_ter(test_preds, test_formal_refs)
print(f"Average TER Score on Test Set: {ter_score}")





Average BLEU Score on Validation Set: 0.4055926501750946




Average TER Score on Validation Set: 0.4402236342430115
Average BLEU Score on Test Set: 0.42151957750320435
Average TER Score on Test Set: 0.4148395359516144
