### Step 10.  Accuracy Measurement

This notebook measuring accuracy with ROUGE and is not included in the dissertation.  Not required for reproducing results.

#### Import required libraries, load submissions and comments with summary

In [1]:
import torch
import pandas as pd
from rouge import Rouge


In [None]:


def extract_nan_rows(df, name):
    nan_rows = df[df['summary'].isna()]
    nan_rows['source'] = name
    return nan_rows

# Load DataFrames
df = pd.read_csv('tfcc_submissions_top20_with_sentiment_including_comment_sentiment_and_summaries.csv')
comments_df = pd.read_csv('tfcc_top_comments_summarized.csv')
pegasus_df = pd.read_csv('tfcc_submissions_top20_pegasus_summaries.csv')
pegasus_comments_df = pd.read_csv('tfcc_top_comments_pegasus_summarized.csv')
cohere_df = pd.read_csv('tfcc_submissions_top20_cohere_summaries.csv')
cohere_comments_df = pd.read_csv('tfcc_top_comments_cohere_summarized.csv')

# Extract rows with NaN values in the 'summary' column
nan_rows_df = pd.DataFrame()
nan_rows_df = nan_rows_df.append(extract_nan_rows(df, "tfcc_submissions_top20_with_sentiment_including_comment_sentiment_and_summaries"))
nan_rows_df = nan_rows_df.append(extract_nan_rows(comments_df, "tfcc_top_comments_summarized"))
nan_rows_df = nan_rows_df.append(extract_nan_rows(pegasus_df, "tfcc_submissions_top20_pegasus_summaries"))
nan_rows_df = nan_rows_df.append(extract_nan_rows(pegasus_comments_df, "tfcc_top_comments_pegasus_summarized"))
nan_rows_df = nan_rows_df.append(extract_nan_rows(cohere_df, "tfcc_submissions_top20_cohere_summaries"))
nan_rows_df = nan_rows_df.append(extract_nan_rows(cohere_comments_df, "tfcc_top_comments_cohere_summarized"))

# Remove rows with NaN values from the original DataFrames
df = df.dropna(subset=['summary'])
comments_df = comments_df.dropna(subset=['summary'])
pegasus_df = pegasus_df.dropna(subset=['summary'])
pegasus_comments_df = pegasus_comments_df.dropna(subset=['summary'])
cohere_df = cohere_df.dropna(subset=['summary'])
cohere_comments_df = cohere_comments_df.dropna(subset=['summary'])

# Print the DataFrame containing rows with NaN values in the 'summary' column
nan_rows_df.to_csv('summary_errors.csv', index=False)



In [3]:
nan_rows_df

Unnamed: 0,id,title,selftext,author,score,num_comments,created_date,selftext_length,topic,pos_sentiment,neg_sentiment,comments_pos_sentiment,comments_neg_sentiment,summary,source
1187,2rfobe,Almost 15 years in callcenter,..and i was unemployed for 3 years. last augus...,reddandy73,1.0,4.0,2015-01-05 20:28:47,120.0,9.0,0.194657,0.805343,0.999371,0.000629,,tfcc_submissions_top20_pegasus_summaries
3,e01rb8,One of my agents actually said what everyone t...,this happened a couple of weeks ago and is bot...,wirwarennamenlos,1553.0,121.0,2019-11-22 14:33:25,138.0,0.0,0.498418,0.501582,0.250270,0.749730,,tfcc_submissions_top20_cohere_summaries
12,b4w6n6,Hung up on a customer today,ill give a little bit of background before i g...,forever_a10ne,1128.0,86.0,2019-03-24 13:11:29,395.0,0.0,0.235142,0.764858,0.131666,0.868334,,tfcc_submissions_top20_cohere_summaries
21,cf0t8g,Perv masturbates loudly and the rep documents it,"so, i wasn't sure i wanted to put this one her...",TaraJo,920.0,74.0,2019-07-19 00:48:39,446.0,0.0,0.199540,0.800460,0.392115,0.607885,,tfcc_submissions_top20_cohere_summaries
35,bqz64k,I Love Karma...,this happened to me a few years ago but it sti...,David-Arroyo,756.0,58.0,2019-05-20 18:17:20,221.0,0.0,0.645398,0.354602,0.269660,0.730340,,tfcc_submissions_top20_cohere_summaries
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191,,,I get calls like that all of the time....and f...,,,,,,8.0,,,,,,tfcc_top_comments_cohere_summarized
193,,,I also worked for a military affiliated credit...,,,,,,8.0,,,,,,tfcc_top_comments_cohere_summarized
223,,,I could see a legitimate scenario where someon...,,,,,,11.0,,,,,,tfcc_top_comments_cohere_summarized
270,,,I once had an elderly couple calling about the...,,,,,,17.0,,,,,,tfcc_top_comments_cohere_summarized


### Measure rouge

In [4]:
# get the number of rows using the shape attribute
start_num_rows = df.shape[0]
pegasus_start_num_rows = pegasus_df.shape[0]
cohere_start_num_rows = cohere_df.shape[0]

In [5]:
print("OpenAI - Measuring accuracy on " +str(start_num_rows) + " rows.")
print("Pegasus - Measuring accuracy on " +str(pegasus_start_num_rows) + " rows.")
print("COhere - Measuring accuracy on " +str(cohere_start_num_rows) + " rows.")


OpenAI - Measuring accuracy on 1964 rows.
Pegasus - Measuring accuracy on 1963 rows.
COhere - Measuring accuracy on 1883 rows.


In [9]:
# Initialize the Rouge object
rouge = Rouge()

# Function to calculate ROUGE scores
def calculate_rouge1_score(row):
    try:
        scores = rouge.get_scores(row['summary'], row['selftext'])
        return scores[0]['rouge-1']  # Return only the ROUGE-1 score
    except Exception as e:
        print(f"Error calculating ROUGE-1 score: {e}")
        return None

def extract_f1_score(row):
    return row['rouge1_scores']['f']



#### Calculate rouge for OpenAI submissions

In [11]:
# Apply the function to the dataframe
df['rouge1_scores'] = df.apply(calculate_rouge1_score, axis=1)

# Extract F1-score and store it in a new column called 'rouge1_f1_score'
df['rouge1_f1_score'] = df.apply(extract_f1_score, axis=1)

# Calculate the average ROUGE-1 F1-score
average_rouge1_f1 = df['rouge1_f1_score'].mean()

# Print the average ROUGE-1 F1-score
print(f"Average ROUGE-1 F1-score: {average_rouge1_f1}")




Average ROUGE-1 F1-score: 0.29108867246853465


#### Calculate rouge for Pegasus submissions

In [12]:
# Apply the function to the dataframe
pegasus_df['rouge1_scores'] = pegasus_df.apply(calculate_rouge1_score, axis=1)

# Extract F1-score and store it in a new column called 'rouge1_f1_score'
pegasus_df['rouge1_f1_score'] = pegasus_df.apply(extract_f1_score, axis=1)

# Calculate the average ROUGE-1 F1-score
average_rouge1_f1 = pegasus_df['rouge1_f1_score'].mean()

# Print the average ROUGE-1 F1-score
print(f"Average ROUGE-1 F1-score: {average_rouge1_f1}")


Average ROUGE-1 F1-score: 0.3557169809484121


#### Calculate rouge for Cohere submissions

In [13]:
# Apply the function to the dataframe
cohere_df['rouge1_scores'] = cohere_df.apply(calculate_rouge1_score, axis=1)

# Extract F1-score and store it in a new column called 'rouge1_f1_score'
cohere_df['rouge1_f1_score'] = cohere_df.apply(extract_f1_score, axis=1)

# Calculate the average ROUGE-1 F1-score
average_rouge1_f1 = cohere_df['rouge1_f1_score'].mean()

# Print the average ROUGE-1 F1-score
print(f"Average ROUGE-1 F1-score: {average_rouge1_f1}")

Average ROUGE-1 F1-score: 0.41162155878564466
