In [1]:
import pandas as pd
from rouge import Rouge
import math
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load the csv file
df = pd.read_csv("summaries.csv")

# Initialize the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")  
model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")

# Initialize the Rouge scorer
rouge = Rouge()

# Loop over every row in the csv file
for index, row in df.iterrows():
    # Get the text from the first column
    text = row['Text']
    # Calculate the length of the text
    text_length = len(text)
    # Calculate the start and end positions for slicing
    start = math.floor(0.15 * text_length)
    end = math.floor(0.85 * text_length)
    # Slice the text to get the first 15% from the start and last 35% from the end
    text = text[start:end]
    # Tokenize the input text
    input_tokenized = tokenizer.encode(text, return_tensors='pt', max_length=1024, truncation=True)
    # Generate the summary using the model
    summary_ids = model.generate(input_tokenized,
                                      num_beams=9,
                                      no_repeat_ngram_size=3,
                                      length_penalty=2.0,
                                      min_length=150,
                                      max_length=250,
                                      early_stopping=True)
    # Decode the generated summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
    # Add the generated summary to the Predicted Summary column in the csv
    df.at[index, 'Predicted Summary'] = summary
    # Get the original summary from the csv
    original_summary = row['Original Summary']
    # Calculate the Rouge score
    scores = rouge.get_scores(summary, original_summary)
    # Add the Rouge R1, R2, and RL scores to the respective columns in the csv
    df.at[index, 'R1'] = scores[0]['rouge-1']['f']
    df.at[index, 'R2'] = scores[0]['rouge-2']['f']
    df.at[index, 'Rl'] = scores[0]['rouge-l']['f']

# Save the updated csv
df.to_csv("Output_DistilBart_With_Position_45Doc.csv", index=False)


# Rouge Score

In [2]:
# Calculate the average scores
r1_avg = df['R1'].mean()
r2_avg = df['R2'].mean()
rl_avg = df['Rl'].mean()

# Print the average scores
print(f"Average R1 Score: {r1_avg}")
print(f"Average R2 Score: {r2_avg}")
print(f"Average Rl Score: {rl_avg}")

Average R1 Score: 0.21053302983428573
Average R2 Score: 0.0918587108739669
Average Rl Score: 0.1990307860267124


# Precision Recall

In [3]:
import pandas as pd
import nltk
from nltk import word_tokenize
from nltk.metrics import precision, recall, f_measure

df = pd.read_csv('Output_DistilBart_With_Position_45Doc.csv')

for index, row in df.iterrows():
    predicted_summary = row['Predicted Summary']
    original_summary = row['Original Summary']
    
    predicted_tokens = set(word_tokenize(predicted_summary))
    original_tokens = set(word_tokenize(original_summary))

    precision_score = precision(original_tokens, predicted_tokens)
    recall_score = recall(original_tokens, predicted_tokens)
    f1_score = f_measure(original_tokens, predicted_tokens)
    
    df.at[index, 'Precision'] = precision_score
    df.at[index, 'Recall'] = recall_score
    df.at[index, 'F1 Score'] = f1_score

df.to_csv('Output_DistilBart_With_Position_45Doc.csv', index=False)


In [4]:
import pandas as pd

df = pd.read_csv('Output_DistilBart_With_Position_45Doc.csv')

precision_mean = df['Precision'].mean()
recall_mean = df['Recall'].mean()
f1_score_mean = df['F1 Score'].mean()

print("Precision mean:", precision_mean)
print("Recall mean:", recall_mean)
print("F1 Score mean:", f1_score_mean)


Precision mean: 0.5571438806482686
Recall mean: 0.1495251848369466
F1 Score mean: 0.23180158233099135


# BLEU Score

In [8]:
import pandas as pd
import nltk
from nltk.translate.bleu_score import sentence_bleu
from nltk.tokenize import word_tokenize

df = pd.read_csv('Output_DistilBart_With_Position_45Doc.csv')

for index, row in df.iterrows():
    predicted_summary = row['Predicted Summary']
    original_summary = row['Original Summary']
    
    predicted_tokens = word_tokenize(predicted_summary)
    original_tokens = word_tokenize(original_summary)

    bleu_score = sentence_bleu([original_tokens], predicted_tokens)
    
    df.at[index, 'BLEU Score'] = bleu_score

df.to_csv('Output_DistilBart_With_Position_45Doc.csv', index=False)


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [9]:
import pandas as pd

df = pd.read_csv('Output_DistilBart_With_Position_45Doc.csv')

bleu_score_mean = df['BLEU Score'].mean()

print("BLEU Score mean:", bleu_score_mean)

BLEU Score mean: 0.004649022296394179
