# Import Libraries & Dataset, and Instantiate Constant Variables

In [1]:
# Import libraries ****************************************************************************

# Preemptive Packages ----------------------------------------
import pandas as pd
import numpy as np
import torch

from rouge import Rouge

from torchtext.data.metrics import bleu_score

from bert_score import score

import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')
from nltk.tokenize import word_tokenize
import string
from nltk.translate.meteor_score import meteor_score

# Constant Variables **********************************************************************************************
rawdata_filename = "RawData.xlsx";
summarydata_filename = "SummaryData.xlsx";
evaluated_data_filename = "EvaluatedData.xlsx";

# Import both raw & summary datasets, and show information ****************************************************************************************
df_raw = pd.read_excel(rawdata_filename);
print("Raw Dataset:");
print(str(df_raw.info()) + "\n");

df_summary = pd.read_excel(summarydata_filename);
print("Summary Dataset:"); 
print(str(df_summary.info()));

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\kunal\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\kunal\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\kunal\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Raw Dataset:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Section         7 non-null      object
 1   Start Page      7 non-null      int64 
 2   End Page        7 non-null      int64 
 3   Extracted Text  7 non-null      object
dtypes: int64(2), object(2)
memory usage: 352.0+ bytes
None

Summary Dataset:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49 entries, 0 to 48
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Section          49 non-null     object
 1   Summarization    49 non-null     object
 2   Technique        49 non-null     object
 3   Summarized Text  49 non-null     object
dtypes: object(4)
memory usage: 1.7+ KB
None


# Define Functions

In [2]:
# Calculate Rouge Score and output evaluation scores in a dataframe
def calculate_rougeScore(raw, summary):
    variation = ["rouge-1","rouge-2","rouge-l"];
    
    rouge = Rouge();
    scores = rouge.get_scores(raw, summary);
    data = [];
    
    for var in variation:
        for measure in ["r","p","f"]:
                data.append(scores[0].get(var).get(measure));
    
    col_list = [];
    for var in variation:
         for measure in ["Recall","Precision","F1 Score"]:
            col_list.append(var + " " + measure);
    
    df_eval = pd.DataFrame([data], columns=col_list);
    return df_eval;

In [3]:
# Calculate BERT Score and output evaluation scores in a dataframe
def calculate_bertScore(raw, summary):
    bert_scores = score([summary], [raw], model_type='bert-base-uncased');
    data = [];
    for bertScore in [bert_scores][0]:
        cleaned_score = str(bertScore).split('tensor([')[1][:-2];
        data.append(cleaned_score);
    df_eval = pd.DataFrame([data], columns=["BERT Precision","BERT Recall", "BERT F1 Score"])
    return df_eval;

In [4]:
# Calculate METEOR Score and output evaluation scores in a dataframe
def calculate_meteorScore(raw, summary, remove_punctuation):
    raw_tokenized = word_tokenize(raw.replace("\n"," ").strip());
    summary_tokenized = word_tokenize(summary.replace("\n"," ").strip());
    
    raw_tokenized_cleaned = [];
    summary_tokenized_cleaned = [];

    if remove_punctuation == True:
        for word in raw_tokenized:
            if word not in string.punctuation:
                raw_tokenized_cleaned.append(word);
        
        for word in summary_tokenized:
            if word not in string.punctuation:
                summary_tokenized_cleaned.append(word);
    else:
        raw_tokenized_cleaned = raw_tokenized;
        summary_tokenized_cleaned = summary_tokenized;
    
    score = meteor_score([raw_tokenized_cleaned], summary_tokenized_cleaned);
    df_eval = pd.DataFrame([score], columns=["METEOR Score"]);

    return df_eval;

In [5]:
# Save evaluation score/s to a dataframe based on a pre-specified evaluation metric
def save_scores(raw_data, summary_data, metric):
    col_list = [];
    # Rouge Data Columns ---------------------------------------------------
    if metric == "Rouge":
        col_list = [
            "Section","Summarization","Technique","Summarized Text",
            "rouge-1 Recall","rouge-1 Precision","rouge-1 F1 Score",
            "rouge-2 Recall","rouge-2 Precision","rouge-2 F1 Score",
            "rouge-l Recall","rouge-l Precision","rouge-l F1 Score"
        ];
    # BERT Data Columns ----------------------------------------------------
    elif metric == "BERT":
        col_list = [
            "Section","Summarization","Technique","Summarized Text",
            "BERT Recall","BERT Precision","BERT F1 Score"
        ];
    # METEOR Data Columns ----------------------------------------------------
    elif metric == "METEOR":
        col_list = [
            "Section","Summarization","Technique","Summarized Text",
            "METEOR Score"
        ];
    # --------------------------------------------------------------------
    
    data = [];
    for i in raw_data.index:
        for j in summary_data.index:
            if (raw_data.loc[i, "Section"] == summary_data.loc[j, "Section"]):
                row = [
                    raw_data.loc[i, "Section"],
                    summary_data.loc[j, "Summarization"],
                    summary_data.loc[j, "Technique"],
                    summary_data.loc[j, "Summarized Text"]
                ];
                eval_data = "";
                # Rouge Scores ---------------------------------------------------
                if metric == "Rouge":
                    eval_data = calculate_rougeScore(
                        raw=raw_data.loc[i, "Extracted Text"], 
                        summary=summary_data.loc[j, "Summarized Text"]
                    );
                # BERT Scores ---------------------------------------------------
                elif metric == "BERT":
                    eval_data = calculate_bertScore(
                        raw=raw_data.loc[i, "Extracted Text"], 
                        summary=summary_data.loc[j, "Summarized Text"]
                    );
                # METEOR Scores ---------------------------------------------------
                elif metric == "METEOR":
                    eval_data = calculate_meteorScore(
                        raw=raw_data.loc[i, "Extracted Text"], 
                        summary=summary_data.loc[j, "Summarized Text"], 
                        remove_punctuation=True
                    );
                # --------------------------------------------------------------- 
                for col in eval_data.columns.tolist():
                    row.append(eval_data.loc[0, col]);
                # ---------------------------------------------------------------

                # print(f'''
                #     Raw Dataset section: {raw_data.loc[i, "Section"]}
                #     Summary Dataset section: {summary_data.loc[j, "Section"]}
                #     Summarization: {summary_data.loc[j, "Summarization"]}
                #     Technique: {summary_data.loc[j, "Technique"]}
                # ''');
                
                data.append(row);

    return pd.DataFrame(data, columns=col_list);

# Evaluate Summaries and Save Results

In [6]:
# Evaluate summaries and save results to dataframe based on Rouge Score
df_rouge = save_scores(df_raw, df_summary, "Rouge");
print(df_rouge.info());

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49 entries, 0 to 48
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Section            49 non-null     object 
 1   Summarization      49 non-null     object 
 2   Technique          49 non-null     object 
 3   Summarized Text    49 non-null     object 
 4   rouge-1 Recall     49 non-null     float64
 5   rouge-1 Precision  49 non-null     float64
 6   rouge-1 F1 Score   49 non-null     float64
 7   rouge-2 Recall     49 non-null     float64
 8   rouge-2 Precision  49 non-null     float64
 9   rouge-2 F1 Score   49 non-null     float64
 10  rouge-l Recall     49 non-null     float64
 11  rouge-l Precision  49 non-null     float64
 12  rouge-l F1 Score   49 non-null     float64
dtypes: float64(9), object(4)
memory usage: 5.1+ KB
None


In [7]:
# Evaluate summaries and save results to dataframe based on BERT Score
df_bert = save_scores(df_raw, df_summary, "BERT");
print(df_bert.info());

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49 entries, 0 to 48
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Section          49 non-null     object
 1   Summarization    49 non-null     object
 2   Technique        49 non-null     object
 3   Summarized Text  49 non-null     object
 4   BERT Recall      49 non-null     object
 5   BERT Precision   49 non-null     object
 6   BERT F1 Score    49 non-null     object
dtypes: object(7)
memory usage: 2.8+ KB
None


In [8]:
# Evaluate summaries and save results to dataframe based on METEOR Score
df_meteor = save_scores(df_raw, df_summary, "METEOR");
print(df_meteor.info());

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49 entries, 0 to 48
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Section          49 non-null     object 
 1   Summarization    49 non-null     object 
 2   Technique        49 non-null     object 
 3   Summarized Text  49 non-null     object 
 4   METEOR Score     49 non-null     float64
dtypes: float64(1), object(4)
memory usage: 2.0+ KB
None


In [9]:
# Save the rouge score, BERT score, and METEOR score dataframes in an Excel workbook, whereby each dataframe populates a single sheet
with pd.ExcelWriter(evaluated_data_filename) as writer:
    df_rouge.to_excel(writer, sheet_name="Rouge", index=False);
    df_bert.to_excel(writer, sheet_name="BERT", index=False);
    df_meteor.to_excel(writer, sheet_name="METEOR", index=False);