In [2]:
from evaluate import load
import pandas as pd
import nltk
from BARTScore.bart_score import BARTScorer

In [27]:
CACHE_DIR = '/scratch/adyansh/cache'
MODEL_NAME = 't5-base'
nltk.data.path.append(CACHE_DIR)

In [22]:
rouge = load('rouge', cache_dir=CACHE_DIR)
bleu = load('bleu', cache_dir=CACHE_DIR)
meteor = load('meteor', cache_dir=CACHE_DIR)
bertscore = load("bertscore", cache_dir=CACHE_DIR)
bartscore = BARTScorer(device='cuda:1', cache_dir=CACHE_DIR)

[nltk_data] Downloading package wordnet to /home2/adyansh/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /home2/adyansh/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home2/adyansh/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [28]:
data = pd.read_csv(f'../results/{MODEL_NAME}.csv')

print(len(data))
data.head()

5262


Unnamed: 0,Context,Decision,t5-base
0,## Context\nThe PriveXMl results generated by ...,We increase complexity during insertion by inc...,tenant + position + transaction. ## Contextn #...
1,## Context\nWe currently have certain specific...,"Due to limited resources and time constraints,...",## Contextn ## Contextn ## Decision Record. ##...
2,"## Context\nCurrently, we move files around in...",- ETL will have a dedicated database tables wh...,"## ContextnCurrently, we move files around in ..."
3,## Context\nMonolith will undergo segmentation...,Create a specific services (ETL and CDH) where...,the tenant data hub will remain the same.n ## ...
4,## Context\nWe wish to segment into separate p...,We have chosen an event based communication wh...,## Contextn ETL service is responsible for pro...


In [29]:
# remove rows in df where MODEL_NAME column is empty
data = data.dropna(subset=[MODEL_NAME])
len(data)

4950

In [30]:
results = {}

results['rouge'] = rouge.compute(predictions=data[MODEL_NAME],references= data['Decision'])
print('Rouge Done')
results['bleu'] = bleu.compute(predictions=data[MODEL_NAME],references= data['Decision'])
print('Bleu Done')
results['meteor'] = meteor.compute(predictions=data[MODEL_NAME],references= data['Decision'])
print('Meteor Done')
results['bertscore'] = bertscore.compute(predictions=data[MODEL_NAME],references= data['Decision'], lang='en', batch_size = 64)
print('BertScore Done')
results['bartscore'] = bartscore.score(data[MODEL_NAME].tolist(), data['Decision'].tolist(), batch_size = 4)

Rouge Done
Bleu Done
Meteor Done
BertScore Done


In [31]:
cols = ['precision', 'recall', 'f1']

for c in cols:
    results['bertscore'][c] = pd.Series(results['bertscore'][c]).mean()
results['bartscore'] = pd.Series(results['bartscore']).mean()

In [32]:
results

{'rouge': {'rouge1': 0.11256309388111059,
  'rouge2': 0.022798009183065654,
  'rougeL': 0.08996747124841085,
  'rougeLsum': 0.08988464874923778},
 'bleu': {'bleu': 0.01845807428774771,
  'precisions': [0.10896937737791365,
   0.02655075593613505,
   0.01331966726930213,
   0.008995853585827267],
  'brevity_penalty': 0.7606889558171072,
  'length_ratio': 0.785218583344645,
  'translation_length': 642107,
  'reference_length': 817743},
 'meteor': {'meteor': 0.07285248861542418},
 'bertscore': {'precision': 0.731808079204174,
  'recall': 0.7996872530320678,
  'f1': 0.7630425258116289,
  'hashcode': 'roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.40.0)'},
 'bartscore': -4.782863617545426}

In [26]:
# write results to a json file
import json
with open(f'../results/{MODEL_NAME}.json', 'w') as f:
    json.dump(results, f)