In [1]:
!pip install rouge




[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import os
from rouge import Rouge

rouge = Rouge()

new_summary_folder = 'New_summary'
old_summary_folder = 'Old_summary'

def read_summary(file_path):
    with open(file_path, 'r') as file:
        return file.read()

new_files = os.listdir(new_summary_folder)
old_files = os.listdir(old_summary_folder)

rouge_1_scores = {'precision': [], 'recall': [], 'f1': []}
rouge_2_scores = {'precision': [], 'recall': [], 'f1': []}
rouge_l_scores = {'precision': [], 'recall': [], 'f1': []}

for new_file in new_files:
    corresponding_old_file = os.path.join(old_summary_folder, new_file)
    if os.path.isfile(corresponding_old_file):
        llm_summary = read_summary(os.path.join(new_summary_folder, new_file))
        reference_summary = read_summary(corresponding_old_file)
        
        scores = rouge.get_scores(llm_summary, reference_summary)
        rouge_1_scores['precision'].append(scores[0]['rouge-1']['p'])
        rouge_1_scores['recall'].append(scores[0]['rouge-1']['r'])
        rouge_1_scores['f1'].append(scores[0]['rouge-1']['f'])
        
        rouge_2_scores['precision'].append(scores[0]['rouge-2']['p'])
        rouge_2_scores['recall'].append(scores[0]['rouge-2']['r'])
        rouge_2_scores['f1'].append(scores[0]['rouge-2']['f'])
        
        rouge_l_scores['precision'].append(scores[0]['rouge-l']['p'])
        rouge_l_scores['recall'].append(scores[0]['rouge-l']['r'])
        rouge_l_scores['f1'].append(scores[0]['rouge-l']['f'])

def calculate_average(scores):
    return sum(scores) / len(scores)

avg_rouge_1_precision = calculate_average(rouge_1_scores['precision'])
avg_rouge_1_recall = calculate_average(rouge_1_scores['recall'])
avg_rouge_1_f1 = calculate_average(rouge_1_scores['f1'])

avg_rouge_2_precision = calculate_average(rouge_2_scores['precision'])
avg_rouge_2_recall = calculate_average(rouge_2_scores['recall'])
avg_rouge_2_f1 = calculate_average(rouge_2_scores['f1'])

avg_rouge_l_precision = calculate_average(rouge_l_scores['precision'])
avg_rouge_l_recall = calculate_average(rouge_l_scores['recall'])
avg_rouge_l_f1 = calculate_average(rouge_l_scores['f1'])

print("ROUGE-1 Precision: {:.4f}".format(avg_rouge_1_precision))
print("ROUGE-1 Recall: {:.4f}".format(avg_rouge_1_recall))
print("ROUGE-1 F1 Score: {:.4f}".format(avg_rouge_1_f1))
print()
print("ROUGE-2 Precision: {:.4f}".format(avg_rouge_2_precision))
print("ROUGE-2 Recall: {:.4f}".format(avg_rouge_2_recall))
print("ROUGE-2 F1 Score: {:.4f}".format(avg_rouge_2_f1))
print()
print("ROUGE-L Precision: {:.4f}".format(avg_rouge_l_precision))
print("ROUGE-L Recall: {:.4f}".format(avg_rouge_l_recall))
print("ROUGE-L F1 Score: {:.4f}".format(avg_rouge_l_f1))


ROUGE-1 Precision: 0.2526
ROUGE-1 Recall: 0.6618
ROUGE-1 F1 Score: 0.3574

ROUGE-2 Precision: 0.1236
ROUGE-2 Recall: 0.4049
ROUGE-2 F1 Score: 0.1836

ROUGE-L Precision: 0.2328
ROUGE-L Recall: 0.6112
ROUGE-L F1 Score: 0.3294
