# Evaluating T-5 Summarizer

## 1. Prepare


1.   Mount Drive
2.   Install/import libraries
3.   Set parameters




In [None]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
!pip install transformers -q
!pip install rouge-score

[K     |████████████████████████████████| 778kB 2.8MB/s 
[K     |████████████████████████████████| 3.0MB 15.9MB/s 
[K     |████████████████████████████████| 1.1MB 28.9MB/s 
[K     |████████████████████████████████| 890kB 44.5MB/s 
[?25h  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
Collecting rouge-score
  Downloading https://files.pythonhosted.org/packages/1f/56/a81022436c08b9405a5247b71635394d44fe7e1dbedc4b28c740e09c2840/rouge_score-0.0.4-py2.py3-none-any.whl
Installing collected packages: rouge-score
Successfully installed rouge-score-0.0.4


In [None]:
from os.path import isfile, join
import shutil
import numpy as np
import pandas as pd
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline
from torch import cuda
from rouge_score import rouge_scorer
import os

In [None]:
# set the seed
np.random.seed(42)

In [None]:
# Navigate to the model's parent directory
%cd /content/drive/My Drive/summarizer/nub-training-evaluation/
# set the model directory
MODEL_DIR = './model'
# create a result directory if needed
OUTPUT_DIR = './result'
if os.path.exists(OUTPUT_DIR):
    shutil.rmtree(OUTPUT_DIR)
os.makedirs(OUTPUT_DIR)

# pick a name for the outout file
FILENAME = 'eval_results'
# Set the test size
TEST_SIZE = 100

## 2. Create Validation Dataset


In [None]:
with open('cnn_dm/test.source') as f:
    test_source = f.readlines()
test_source = [x.strip() for x in test_source] 
test_source = pd.DataFrame(test_source)
with open('cnn_dm/test.target') as f:
    test_target = f.readlines()
test_target = [x.strip() for x in test_target] 
test_target = pd.DataFrame(test_target)

In [None]:
sample_ids = np.random.choice(test_source.size, replace = False, size = TEST_SIZE) 
cnn_dm_val = pd.concat([test_source.iloc[sample_ids], test_target.iloc[sample_ids]], axis=1)
cnn_dm_val.columns = ['full_text', 'summary']

## 3. Run Eval function

In [None]:
def eval_t5(val_dataset, model_dir, output_dir, filename):
    rouge_1_fs = []
    rouge_2_fs = []
    rouge_l_fs = []
    eval_results = val_dataset.copy()
    eval_results['system_summary'] = ''
    model = T5ForConditionalGeneration.from_pretrained(model_dir)
    tokenizer = T5Tokenizer.from_pretrained(model_dir)
    summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    for ind, row in eval_results.iterrows():
        system_summary = summarizer(row.full_text)[0]['summary_text']
        row.system_summary = system_summary
        scores = scorer.score(row.summary, row.system_summary)
        rouge_1_fs.append(scores['rouge1'].fmeasure)
        rouge_2_fs.append(scores['rouge2'].fmeasure)
        rouge_l_fs.append(scores['rougeL'].fmeasure)
    eval_results['rouge1_f']=np.array(rouge_1_fs)
    eval_results['rouge2_f']=np.array(rouge_2_fs)
    eval_results['rougeL_f']=np.array(rouge_l_fs)
    output_dir = join(output_dir, filename+'.csv')
    eval_results.to_csv(output_dir)

In [None]:
eval_t5(cnn_dm_val, MODEL_DIR, OUTPUT_DIR, filename='eval_results_nub')

In [None]:
eval_t5(cnn_dm_val, "t5-base", OUTPUT_DIR, filename='eval_results_t5_base')