In [None]:
!pip install datasets torch pandas evaluate rouge_score sacrebleu

Collecting datasets
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
Collecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sacrebleu
  Downloading sacrebleu-2.4.0-py3-none-any.whl (106 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.3/106.3 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading mult

In [None]:
import evaluate
from google.colab import drive
from datasets import load_dataset, Dataset, DatasetDict
from transformers import AutoTokenizer, BartForConditionalGeneration
from tqdm import tqdm

import pandas as pd
import torch
import random

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
test_df = pd.DataFrame()

i = 50
for iteration in range(i,i+10):
  test_dataset_path = f"./drive/MyDrive/Submission/BE Project Group No 31/Data/test_data{iteration}.csv"
  test_df = pd.concat([test_df, pd.read_csv(test_dataset_path)])

# scillm/scientific_papers-archive
i = 121
for iteration in range(i,i+10):
  test_dataset_path = f"./drive/MyDrive/Submission/BE Project Group No 31/Data/test_data{iteration}.csv"
  test_df = pd.concat([test_df, pd.read_csv(test_dataset_path)])


test_dataset = Dataset.from_pandas( test_df )
papers_dataset = DatasetDict({"test":test_dataset})

In [None]:
papers_dataset

DatasetDict({
    test: Dataset({
        features: ['article', 'abstract', '__index_level_0__'],
        num_rows: 1050
    })
})

In [None]:
model_path = "./drive/MyDrive/Submission/BE Project Group No 31/Model/iter_trained_model"
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn",device=device)
model = BartForConditionalGeneration.from_pretrained(model_path).to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
def generate_batch_sized_chunks(list_of_elements, batch_size):
    """split the dataset into smaller batches that we can process simultaneously
    Yield successive batch-sized chunks from list_of_elements."""
    for i in range(0, len(list_of_elements), batch_size):
        yield list_of_elements[i : i + batch_size]

def calculate_metric_on_test_ds(dataset, metric, model, tokenizer,
                               batch_size=2, device=device,
                               column_text="article",
                               column_summary="highlights"):
    article_batches = list(generate_batch_sized_chunks(dataset[column_text], batch_size))
    target_batches = list(generate_batch_sized_chunks(dataset[column_summary], batch_size))

    for article_batch, target_batch in tqdm(
        zip(article_batches, target_batches), total=len(article_batches)):

        inputs = tokenizer(article_batch, max_length=1024,  truncation=True,
                        padding="max_length", return_tensors="pt")

        summaries = model.generate(input_ids=inputs["input_ids"].to(device),
                         attention_mask=inputs["attention_mask"].to(device),
                         length_penalty=0.8, num_beams=8, max_length=128)
        ''' parameter for length penalty ensures that the model does not generate sequences that are too long. '''

        # Finally, we decode the generated texts,
        # replace the  token, and add the decoded texts with the references to the metric.
        decoded_summaries = [tokenizer.decode(s, skip_special_tokens=True,
                                clean_up_tokenization_spaces=True)
               for s in summaries]

        decoded_summaries = [d.replace("", " ") for d in decoded_summaries]


        metric.add_batch(predictions=decoded_summaries, references=target_batch)

    #  Finally compute and return the ROUGE scores.
    score = metric.compute()
    return score

### Rouge Score

In [None]:
rouge_metric = evaluate.load("rouge")

rouge_score = calculate_metric_on_test_ds(papers_dataset['test'], rouge_metric, model, tokenizer, column_text = 'article', column_summary='abstract', batch_size=8)

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

100%|██████████| 132/132 [22:46<00:00, 10.35s/it]


In [None]:
pd.DataFrame(rouge_score,index=["Trained Model"])

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum
Trained Model,0.01478,0.000429,0.014318,0.014653


### Bleu Score

In [None]:
bleu_metric = evaluate.load("sacrebleu")

bleu_score = calculate_metric_on_test_ds(papers_dataset['test'], bleu_metric, model, tokenizer, column_text = 'article', column_summary='abstract', batch_size=8)

Downloading builder script:   0%|          | 0.00/8.15k [00:00<?, ?B/s]

100%|██████████| 132/132 [22:46<00:00, 10.35s/it]


In [None]:
print("Bleu Score : ",bleu_score["score"])

Bleu Score :  0.08355345747385627
