In [None]:
import csv
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load the tokenizer and model from Hugging Face's Transformers library
tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")

# Load the summaries.csv file into a dataframe
df = pd.read_csv("new_summaries.csv")

# Loop through the rows in the input csv file
for index, row in df.iterrows():
    text = row['Text']
    # Tokenize the input text
    input_tokenized = tokenizer.encode(text, return_tensors='pt', max_length=1024, truncation=True)
    # Generate the summary using the pre-trained model
    summary_ids = model.generate(input_tokenized,
                                 num_beams=9,
                                 no_repeat_ngram_size=3,
                                 length_penalty=2.0,
                                 min_length=150,
                                 max_length=500,
                                 early_stopping=True)
    # Decode the generated summary from token IDs to text
    predicted_summary = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids][0]
    df.at[index, 'Predicted Summary'] = predicted_summary

from rouge import Rouge

# Calculate the Rouge scores for each row
for index, row in df.iterrows():
    rouge = Rouge()
    original_summary = row["Original Summary"]
    predicted_summary = row["Predicted Summary"]
    scores = rouge.get_scores(predicted_summary, original_summary)
    # Extract Rouge scores (rouge-1, rouge-2, and rouge-l)
    rouge_1_f1 = scores[0]['rouge-1']['f']
    rouge_2_f1 = scores[0]['rouge-2']['f']
    rouge_l_f1 = scores[0]['rouge-l']['f']
    # Store the Rouge scores in the dataframe
    df.at[index, 'R1'] = rouge_1_f1
    df.at[index, 'R2'] = rouge_2_f1
    df.at[index, 'Rl'] = rouge_l_f1

# Save the dataframe with predicted summaries and Rouge scores to a CSV file
df.to_csv('Output_DistilBart_Original_100doc.csv', index=False)

# Calculate the average scores
r1_avg = df['R1'].mean()
r2_avg = df['R2'].mean()
rl_avg = df['Rl'].mean()

# Print the average scores
print(f"Average R1 Score: {r1_avg}")
print(f"Average R2 Score: {r2_avg}")
print(f"Average Rl Score: {rl_avg}")

df.to_csv('Output_DistilBart_Original_100doc.csv', index=False)

import pandas as pd
import nltk
from nltk import word_tokenize
from nltk.metrics import precision, recall, f_measure

df = pd.read_csv('Output_DistilBart_Original_100doc.csv')

# Calculate precision, recall, and F1 score for each predicted summary
for index, row in df.iterrows():
    predicted_summary = row['Predicted Summary']
    original_summary = row['Original Summary']

    # Tokenize the predicted and original summaries
    predicted_tokens = set(word_tokenize(predicted_summary))
    original_tokens = set(word_tokenize(original_summary))

    # Calculate precision, recall, and F1 score
    precision_score = precision(original_tokens, predicted_tokens)
    recall_score = recall(original_tokens, predicted_tokens)
    f1_score = f_measure(original_tokens, predicted_tokens)

    # Store the scores in the dataframe
    df.at[index, 'Precision'] = precision_score
    df.at[index, 'Recall'] = recall_score
    df.at[index, 'F1 Score'] = f1_score

# Save the dataframe with precision, recall, and F1 scores to a CSV file
df.to_csv('Output_DistilBart_Original_100doc.csv', index=False)

import pandas as pd

df = pd.read_csv('Output_DistilBart_Original_100doc.csv')

# Calculate the mean precision, recall, and F1 score
precision_mean = df['Precision'].mean()
recall_mean = df['Recall'].mean()
f1_score_mean = df['F1 Score'].mean()

# Print the mean scores
print("Precision mean:", precision_mean)
print("Recall mean:", recall_mean)
print("F1 Score mean:", f1_score_mean)

import pandas as pd
import nltk
from nltk.translate.bleu_score import sentence_bleu

df = pd.read_csv('Output_DistilBart_Original_100doc.csv')

# Calculate BLEU score for each predicted summary
for index, row in df.iterrows():
    predicted_summary = row['Predicted Summary']
    original_summary = row['Original Summary']

    # Tokenize the predicted and original summaries
    predicted_tokens = word_tokenize(predicted_summary)
    original_tokens = word_tokenize(original_summary)

    # Calculate BLEU score
    bleu_score = sentence_bleu([original_tokens], predicted_tokens)

    # Store the BLEU score in the dataframe
    df.at[index, 'BLEU Score'] = bleu_score

# Save the dataframe with BLEU scores to a CSV file
df.to_csv('Output_DistilBart_Original_100doc.csv', index=False)

import pandas as pd

df = pd.read_csv('Output_DistilBart_Original_100doc.csv')

# Calculate the mean BLEU score
bleu_score_mean = df['BLEU Score'].mean()

# Print the mean BLEU score
print("BLEU Score mean:", bleu_score_mean)
