In [1]:
import pandas as pd
from rouge import Rouge
from sentence_transformers import SentenceTransformer
import nltk
from nltk.tokenize import sent_tokenize
from sklearn.metrics.pairwise import cosine_similarity
import re
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

nltk.download('punkt')
rouge = Rouge()
# Create a sentence embedding model
sent_model = SentenceTransformer('bert-base-nli-mean-tokens')


def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s.,""]', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text

# Load the CSV file
df = pd.read_csv("new_summaries.csv")

# Loop through each row in the CSV file
for index, row in df.iterrows():
    text = row['Text']

    # Preprocess the text
    text = preprocess_text(text)

    # Tokenize the text into sentences
    sentences = sent_tokenize(text)

    # Obtain sentence embeddings for each sentence
    sentence_embeddings = sent_model.encode(sentences)

    # Define the threshold
    threshold = 0.4

    # Initialize a dictionary to store the results
    results = {}

    # Loop through each sentence
    for i, sent1 in enumerate(sentences):
        count = 0
        # Loop through each sentence again
        for j, sent2 in enumerate(sentences):
            if i != j:
                # Compute cosine similarity between sentence embeddings
                similarity = cosine_similarity(sentence_embeddings[i].reshape(1, -1), sentence_embeddings[j].reshape(1, -1))[0][0]
                # If similarity is greater than threshold, increment count
                if similarity > threshold:
                    count += 1
        # Add sentence and count to dictionary
        results[sent1] = count

    # Sort the dictionary in descending order of values
    sorted_results = dict(sorted(results.items(), key=lambda item: item[1], reverse=True))

    # Extract only the top 40% of sentences
    num_sentences = int(0.4 * len(sorted_results))
    top_sentences = list(sorted_results.keys())[:num_sentences]

    # Sort the top sentences based on their original order in the text
    sorted_top_sentences = sorted(top_sentences, key=lambda x: sentences.index(x))

    # Join the top sentences into a text string
    text_after_similarity = ' '.join(sorted_top_sentences)
    # Initialize the tokenizer and the model
    tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")  
    model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
    input_tokenized = tokenizer.encode(text_after_similarity, return_tensors='pt', max_length=1024, truncation=True)
    # Generate the summary using the model
    summary_ids = model.generate(input_tokenized,
                                      num_beams=9,
                                      no_repeat_ngram_size=3,
                                      length_penalty=2.0,
                                      min_length=150,
                                      max_length=500,
                                      early_stopping=True)
    # Decode the generated summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
    # Add the generated summary to the Predicted Summary column in the csv
    df.at[index, 'Predicted Summary'] = summary
    # Get the original summary from the csv
    original_summary = row['Original Summary']
    # Calculate the Rouge score
    scores = rouge.get_scores(summary, original_summary)
    # Add the Rouge R1, R2, and RL scores to the respective columns in the csv
    df.at[index, 'R1'] = scores[0]['rouge-1']['f']
    df.at[index, 'R2'] = scores[0]['rouge-2']['f']
    df.at[index, 'Rl'] = scores[0]['rouge-l']['f']

# Save the updated csv
df.to_csv("Output_DistilBart_Sentence_Similarity_100doc.csv", index=False)



[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Pranav\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# Rouge Score

In [2]:
# Calculate the average scores
r1_avg = df['R1'].mean()
r2_avg = df['R2'].mean()
rl_avg = df['Rl'].mean()

# Print the average scores
print(f"Average R1 Score: {r1_avg}")
print(f"Average R2 Score: {r2_avg}")
print(f"Average Rl Score: {rl_avg}")

Average R1 Score: 0.21568013462413618
Average R2 Score: 0.07617523410053513
Average Rl Score: 0.1924606707904992


# Precision, Recall and F1 Score

In [3]:
import pandas as pd
import nltk
from nltk import word_tokenize
from nltk.metrics import precision, recall, f_measure

df = pd.read_csv('Output_DistilBart_Sentence_Similarity_100doc.csv')

for index, row in df.iterrows():
    predicted_summary = row['Predicted Summary']
    original_summary = row['Original Summary']
    
    predicted_tokens = set(word_tokenize(predicted_summary))
    original_tokens = set(word_tokenize(original_summary))

    precision_score = precision(original_tokens, predicted_tokens)
    recall_score = recall(original_tokens, predicted_tokens)
    f1_score = f_measure(original_tokens, predicted_tokens)
    
    df.at[index, 'Precision'] = precision_score
    df.at[index, 'Recall'] = recall_score
    df.at[index, 'F1 Score'] = f1_score

df.to_csv('Output_DistilBart_Sentence_Similarity_100doc.csv', index=False)


In [4]:
import pandas as pd

df = pd.read_csv('Output_DistilBart_Sentence_Similarity_100doc.csv')

precision_mean = df['Precision'].mean()
recall_mean = df['Recall'].mean()
f1_score_mean = df['F1 Score'].mean()

print("Precision mean:", precision_mean)
print("Recall mean:", recall_mean)
print("F1 Score mean:", f1_score_mean)


Precision mean: 0.45441047612790053
Recall mean: 0.17369382611180786
F1 Score mean: 0.23511882773966866


# BLEU Score

In [5]:
import pandas as pd
import nltk
from nltk.translate.bleu_score import sentence_bleu
from nltk.tokenize import word_tokenize

df = pd.read_csv('Output_DistilBart_Sentence_Similarity_100doc.csv')

for index, row in df.iterrows():
    predicted_summary = row['Predicted Summary']
    original_summary = row['Original Summary']
    
    predicted_tokens = word_tokenize(predicted_summary)
    original_tokens = word_tokenize(original_summary)

    bleu_score = sentence_bleu([original_tokens], predicted_tokens)
    
    df.at[index, 'BLEU Score'] = bleu_score

df.to_csv('Output_DistilBart_Sentence_Similarity_100doc.csv', index=False)


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [6]:
import pandas as pd

df = pd.read_csv('Output_DistilBart_Sentence_Similarity_100doc.csv')

bleu_score_mean = df['BLEU Score'].mean()

print("BLEU Score mean:", bleu_score_mean)

BLEU Score mean: 0.006713529045199813
