In [1]:
!pip install transformers datasets nltk sentence-transformers

Collecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl.metadata (19 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting requests (from transformers)
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.5.0,>=2023.1.0 (from fsspec[http]<=2024.5.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.5.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->s

In [2]:
!pip install transformers[torch] accelerate -U

Collecting accelerate
  Downloading accelerate-0.33.0-py3-none-any.whl.metadata (18 kB)
Collecting transformers[torch]
  Downloading transformers-4.43.3-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Downloading accelerate-0.33.0-py3-none-any.whl (315 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m315.1/315.1 kB[0m [31m18.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading transformers-4.43.3-py3-none-any.whl (9.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m107.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers, accelerate
  Attempting uninstall: transformers
    Found existing installation: transformers 4.42.4
    Uninstalling transformers-4.42.4:
      Successfully uninstalled transformers-4.42.4
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.32.1
    Uni

In [3]:
from datasets import load_from_disk
from transformers import T5ForConditionalGeneration, T5Tokenizer
import os
import torch
from sentence_transformers import SentenceTransformer, util
from nltk.translate.bleu_score import sentence_bleu

In [4]:
from google.colab import drive

drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [5]:
def LoadModelTokenizer(load_directory):

  model = T5ForConditionalGeneration.from_pretrained(load_directory)
  tokenizer = T5Tokenizer.from_pretrained(load_directory)

  return model, tokenizer

In [6]:
def LoadTestDataset(load_directory):

  tokenized_test_dataset = load_from_disk(load_directory)

  return tokenized_test_dataset

In [7]:
def SaveFile(full_path, text, mode):
    # Ensure the directory exists
    os.makedirs(os.path.dirname(full_path), exist_ok=True)
    with open(full_path, mode=mode, encoding='utf8') as file:
        file.write(text)

    file.close()

In [8]:
def ComputeSemanticSimilarity(model_sbert, sentence1, sentence2):
  with torch.no_grad():
    embeddings1 = model_sbert.encode(sentence1, convert_to_tensor=True)
    embeddings2 = model_sbert.encode(sentence2, convert_to_tensor=True)
    similarity = util.pytorch_cos_sim(embeddings1, embeddings2)

  return similarity.item()

In [9]:
def ComputeBleu(sentence1, sentence2, tokenizer):

    # Tokenize sentences
    original_simple_tokens = tokenizer.tokenize(sentence1)
    genereted_simple_tokens = tokenizer.tokenize(sentence2)

    # Compute BLEU score
    bleu_score = sentence_bleu([original_simple_tokens], genereted_simple_tokens)
    return bleu_score

In [10]:
def Translate(model_sbert, test_sentence, model, tokenizer, translated_sentenses_path, mode):

    model.to('cpu')
    original_sentence = test_sentence["input_text"]
    original_sentence_no_prefix = original_sentence.replace("translate English to Simple English: ", "")
    test_simple_sentence = test_sentence["target_text"]
    input_encodings = tokenizer(original_sentence, return_tensors='pt').to('cpu')
    outputs = model.generate(input_encodings['input_ids'], max_length=512, num_beams=4, early_stopping=True)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    bleu_score = ComputeBleu(test_simple_sentence, generated_text, tokenizer)
    cos_sim = ComputeSemanticSimilarity(model_sbert, test_simple_sentence, generated_text)

    output_str = (
        f"Original English sentence: {original_sentence_no_prefix}\n"
        f"Original Simple English sentence: {test_simple_sentence}\n"
        f"The Simple English sentence from model: {generated_text}\n"
        f"BLEU score: {bleu_score}\n"
        f"Semantic similarity score (cosine-similarity): {cos_sim}\n\n"
    )

    SaveFile(translated_sentenses_path, output_str, mode)
    return bleu_score, cos_sim

In [11]:
def main(model_tokenizer_path, test_dataset_path, translated_sentenses_path):

  model, tokenizer = LoadModelTokenizer(model_tokenizer_path)
  tokenized_test_dataset = LoadTestDataset(test_dataset_path)
  model_sbert = SentenceTransformer('paraphrase-MiniLM-L6-v2')
  first = True
  bleu_scores = 0.0
  cos_sim_scores = 0.0
  for sentence in tokenized_test_dataset:
    if first:
      bleu_score, cos_sim = Translate(model_sbert, sentence, model, tokenizer, translated_sentenses_path, "w")
      bleu_scores += bleu_score
      cos_sim_scores += cos_sim
      first = False
    else:
      bleu_score, cos_sim = Translate(model_sbert, sentence, model, tokenizer, translated_sentenses_path, "a")
      bleu_scores += bleu_score
      cos_sim_scores += cos_sim

  bleu_scores /= len(tokenized_test_dataset)
  cos_sim_scores /= len(tokenized_test_dataset)

  print(f"Avg BLEU score over all test sentences is: {bleu_scores}")
  print(f"Avg cos-similarity score over all test sentences is: {cos_sim_scores}")

In [12]:
model_tokenizer_path = '/content/drive/MyDrive/NLP_Proj/saving_models_T5/english_simple_T5'
test_dataset_path = '/content/drive/MyDrive/NLP_Proj/saving_datasets_T5'
translated_sentenses_path = '/content/drive/MyDrive/NLP_Proj/translated_test_sentences_T5/translated_by_trained_T5.txt'

main(model_tokenizer_path, test_dataset_path, translated_sentenses_path)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.73k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


Avg BLEU score over all test sentences is: 0.11316056147065236
Avg cos-similarity score over all test sentences is: 0.7374932295957429
