In [1]:
import pandas as pd
import numpy as np
import warnings
# warnings.filterwarnings("ignore")

In [2]:
from rouge_score import rouge_scorer 
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rouge3', 'rougeL', 'rougeLsum'], use_stemmer=True)

def get_rougue_score(text, highlights, metric="rougeL"):
    score =  scorer.score(text, highlights)[metric].fmeasure
    return score

## Prediction extractive _12EPOCH

In [43]:
df = pd.read_csv("predictions_extractive_12epoch.csv")
df['Rouge1'] = df.apply(lambda x: get_rougue_score(x['Actual Text'], x['Generated Text'], metric="rouge1"), axis=1)
df['Rouge2'] = df.apply(lambda x: get_rougue_score(x['Actual Text'], x['Generated Text'], metric="rouge2"), axis=1)
df['Rouge3'] = df.apply(lambda x: get_rougue_score(x['Actual Text'], x['Generated Text'], metric="rouge3"), axis=1)
df['RougeL'] = df.apply(lambda x: get_rougue_score(x['Actual Text'], x['Generated Text'], metric="rougeL"), axis=1)
df['RougeLsum'] = df.apply(lambda x: get_rougue_score(x['Actual Text'], x['Generated Text'], metric="rougeLsum"), axis=1)

In [44]:
df.to_csv("predictions_extractive_12epoch_with_rouge.csv", sep=',', encoding='utf-8', index=False)

In [45]:
print(df['Rouge1'].mean())
print(df['Rouge2'].mean())
print(df['Rouge3'].mean())
print(df['RougeL'].mean())
print(df['RougeLsum'].mean())

0.3420935804294753
0.07107610925044244
0.022872501832466308
0.20376950223567852
0.20376950223567852


## Prediction extractive my_12EPOCH

In [46]:
dfa = pd.read_csv("predictions_extractive_my_12epoch.csv")
dfa['Rouge1'] = dfa.apply(lambda x: get_rougue_score(x['Actual Text'], x['Generated Text'], metric="rouge1"), axis=1)
dfa['Rouge2'] = dfa.apply(lambda x: get_rougue_score(x['Actual Text'], x['Generated Text'], metric="rouge2"), axis=1)
dfa['Rouge3'] = dfa.apply(lambda x: get_rougue_score(x['Actual Text'], x['Generated Text'], metric="rouge3"), axis=1)
dfa['RougeL'] = dfa.apply(lambda x: get_rougue_score(x['Actual Text'], x['Generated Text'], metric="rougeL"), axis=1)
dfa['RougeLsum'] = dfa.apply(lambda x: get_rougue_score(x['Actual Text'], x['Generated Text'], metric="rougeLsum"), axis=1)

In [47]:
print(dfa['Rouge1'].mean())
print(dfa['Rouge2'].mean())
print(dfa['Rouge3'].mean())
print(dfa['RougeL'].mean())
print(dfa['RougeLsum'].mean())

0.3319439854099592
0.06875151021147312
0.020799614376231502
0.2054400595934515
0.2054400595934515


In [48]:
dfa.to_csv("predictions_extractive_my_12epoch_with_rouge.csv", sep=',', encoding='utf-8', index=False)

# Run below for generating the bert score of the abstract model per column
## check the cummulative mean bert_score at the end

In [9]:
from datasets import list_metrics, load_metric
from statistics import mean
metrics_list = list_metrics()

In [14]:
df_abstract = pd.read_csv("yale_bart_predictions.csv")

In [10]:
bertscore = load_metric("bertscore")
def bert_scorer(generated_sums, true_sums, metric):
    return bertscore.compute(predictions=generated_sums, references=true_sums, lang="en", verbose=False)[metric]


In [38]:
df_abstract['Rouge1'] = df_abstract.apply(lambda x: get_rougue_score(x['Actual Text'], x['Generated Text'], metric="rouge1"), axis=1)
df_abstract['Rouge2'] = df_abstract.apply(lambda x: get_rougue_score(x['Actual Text'], x['Generated Text'], metric="rouge2"), axis=1)
df_abstract['Rouge3'] = df_abstract.apply(lambda x: get_rougue_score(x['Actual Text'], x['Generated Text'], metric="rouge3"), axis=1)
df_abstract['RougeL'] = df_abstract.apply(lambda x: get_rougue_score(x['Actual Text'], x['Generated Text'], metric="rougeL"), axis=1)
df_abstract['RougeLsum'] = df_abstract.apply(lambda x: get_rougue_score(x['Actual Text'], x['Generated Text'], metric="rougeLsum"), axis=1)

In [3]:
df = pd.read_csv("predictions_extractive_12epoch.csv")

In [14]:
df2 = pd.read_csv("predictions_extractive_my_12epoch.csv")

In [15]:
df2["bertscore_f1"] = bert_scorer(df2['Actual Text'],df2['Generated Text'], "f1")


df2["bertscore_precision"] = bert_scorer(df2['Actual Text'],df2['Generated Text'], "precision")


df2["bertscore_recall"] = bert_scorer(df2['Actual Text'],df2['Generated Text'], "recall")

In [22]:
df_abstract["bertscore_f1"] = bert_scorer(df_abstract['Actual Text'],df_abstract['Generated Text'], "f1")


df_abstract["bertscore_recall"] = bert_scorer(df_abstract['Actual Text'],df_abstract['Generated Text'], "precision")


df_abstract["bertscore_precision"] = bert_scorer(df_abstract['Actual Text'],df_abstract['Generated Text'], "recall")


In [23]:
# df_abstract["bertscore_f1"]

0      0.850033
1      0.853063
2      0.846542
3      0.851886
4      0.866587
         ...   
198    0.860166
199    0.859972
200    0.841402
201    0.833093
202    0.836516
Name: bertscore_f1, Length: 203, dtype: float64

In [29]:
df_abstract["bertscore_precision"]

0      0.853757
1      0.852930
2      0.851437
3      0.850306
4      0.870103
         ...   
198    0.863403
199    0.860387
200    0.844228
201    0.840196
202    0.837589
Name: bertscore_precision, Length: 203, dtype: float64

In [24]:
cummulative_bertscore = df_abstract["bertscore_f1"].mean()
print("Cummulative Bert Score F1:",cummulative_bertscore)

Cummulative Bert Score F1: 0.8524506658756087


In [13]:
print(df_abstract['Rouge1'].mean())
print(df_abstract['Rouge2'].mean())
print(df_abstract['Rouge3'].mean())
print(df_abstract['RougeL'].mean())
print(df_abstract['RougeLsum'].mean())

0.3383724122981066
0.07474964717459603
0.021157335557296413
0.20339260055258954
0.20339260055258954


In [30]:
df_abstract.to_csv("yale_bart_predictions_bert_rouge.csv", sep=',', encoding='utf-8', index=False)

In [4]:
df_abstract = pd.read_csv("yale_bart_predictions_bert_rouge.csv", sep=',', encoding='utf-8')

In [6]:
print(df_abstract["bertscore_f1"].mean())
print(df_abstract["bertscore_precision"].mean())
print(df_abstract["bertscore_recall"].mean())

0.8524506658756087
0.8505914777957747
0.8544527682764776


In [12]:
print(df["bertscore_f1"].mean())
print(df["bertscore_precision"].mean())
print(df["bertscore_recall"].mean())

0.8541504021348625
0.8527764460723388
0.8557632894351564


In [16]:
print(df2["bertscore_f1"].mean())
print(df2["bertscore_precision"].mean())
print(df2["bertscore_recall"].mean())

0.8526606627285774
0.851575328505098
0.8539447851956184
