In [None]:
!pip install --quiet nltk datasets evaluate openpyxl pandas sumy

from datasets import load_dataset
import pandas as pd
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import evaluate
import nltk
import string
import os

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')

# Initialize ROUGE scorer
rouge = evaluate.load('rouge')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


**EXTRACTIVE METHODS**

**FREQUENCY BASED METHOD**

In [None]:
# Frequency-based summarization function
def frequency_based_summarization(text):
    words = word_tokenize(text.lower())
    sentences = sent_tokenize(text)

    stop_words = set(stopwords.words("english"))
    words = [word for word in words if word not in stop_words and word not in string.punctuation]

    word_freq = {}
    for word in words:
        word_freq[word] = word_freq.get(word, 0) + 1

    max_freq = max(word_freq.values())
    word_freq = {word: freq / max_freq for word, freq in word_freq.items()}

    sentence_scores = {}
    for sentence in sentences:
        sentence_words = word_tokenize(sentence.lower())
        for word in sentence_words:
            if word in word_freq:
                sentence_scores[sentence] = sentence_scores.get(sentence, 0) + word_freq[word]

    sorted_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)
    summary_sentences = sorted_sentences[:3]

    return ' '.join(summary_sentences)

# Load the CNN/DailyMail dataset
cnn_dataset = load_dataset('cnn_dailymail', '3.0.0', split='test[:5]')  # Use only first 5 articles
results = []

# Process each article
for i, data in enumerate(cnn_dataset):
    article_id = i + 1
    source_text = data['article']
    reference_summary = data['highlights']

    # Generate summary using frequency-based method
    generated_summary = frequency_based_summarization(source_text)

    # Tokenize summaries for BLEU calculation
    tokenized_generated = word_tokenize(generated_summary)
    tokenized_reference = word_tokenize(reference_summary)

    # Calculate BLEU scores for n-grams
    weights_2gram = (0.5, 0.5, 0, 0)
    weights_3gram = (0.33, 0.33, 0.34, 0)
    weights_4gram = (0.25, 0.25, 0.25, 0.25)

    bleu_2gram = sentence_bleu([tokenized_reference], tokenized_generated, weights=weights_2gram, smoothing_function=SmoothingFunction().method1)
    bleu_3gram = sentence_bleu([tokenized_reference], tokenized_generated, weights=weights_3gram, smoothing_function=SmoothingFunction().method1)
    bleu_4gram = sentence_bleu([tokenized_reference], tokenized_generated, weights=weights_4gram, smoothing_function=SmoothingFunction().method1)

    # Calculate ROUGE score
    rouge_scores = rouge.compute(predictions=[generated_summary], references=[reference_summary])
    rouge_1 = rouge_scores['rouge1']
    rouge_2 = rouge_scores['rouge2']
    rouge_l = rouge_scores['rougeL']

    # Store results for each article in a dictionary
    results.append({
        'id': article_id,
        'source_text': source_text,
        'reference_summary': reference_summary,
        'generated_summary': generated_summary,
        'rouge1': rouge_1,
        'rouge2': rouge_2,
        'rougel': rouge_l,
        '2-gram BLEU': bleu_2gram,
        '3-gram BLEU': bleu_3gram,
        '4-gram BLEU': bleu_4gram
    })

# Convert results to a DataFrame
df = pd.DataFrame(results)

# Define the Excel file name and the sheet name
file_name = 'Bleu.xlsx'
sheet_name = 'Frequency'
with pd.ExcelWriter(file_name, engine='openpyxl', mode='w') as writer:
    df.to_excel(writer, sheet_name=sheet_name, index=False)
print("Success")


Success


**LSA METHOD**

In [None]:
from sumy.summarizers.lsa import LsaSummarizer

# Function for LSA-based summarization
def lsa_summarization(text, sentence_count=3):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LsaSummarizer()
    summary = summarizer(parser.document, sentence_count)
    summary_sentences = [str(sentence) for sentence in summary]
    return ' '.join(summary_sentences)


# Load the CNN/DailyMail dataset
cnn_dataset = load_dataset('cnn_dailymail', '3.0.0', split='test[:5]')  # Use only first 5 articles
results = []

# Process each article
results = []
for i, data in enumerate(cnn_dataset):
    article_id = i + 1
    source_text = data['article']
    reference_summary = data['highlights']

    # Generate summary using LSA method
    generated_summary = lsa_summarization(source_text)

    # Tokenize summaries for BLEU calculation
    tokenized_generated = word_tokenize(generated_summary)
    tokenized_reference = word_tokenize(reference_summary)

    # Calculate BLEU scores for n-grams
    weights_2gram = (0.5, 0.5, 0, 0)
    weights_3gram = (0.33, 0.33, 0.34, 0)
    weights_4gram = (0.25, 0.25, 0.25, 0.25)

    bleu_2gram = sentence_bleu([tokenized_reference], tokenized_generated, weights=weights_2gram, smoothing_function=SmoothingFunction().method1)
    bleu_3gram = sentence_bleu([tokenized_reference], tokenized_generated, weights=weights_3gram, smoothing_function=SmoothingFunction().method1)
    bleu_4gram = sentence_bleu([tokenized_reference], tokenized_generated, weights=weights_4gram, smoothing_function=SmoothingFunction().method1)

    # Calculate ROUGE score
    rouge_scores = rouge.compute(predictions=[generated_summary], references=[reference_summary])
    rouge_1 = rouge_scores['rouge1']
    rouge_2 = rouge_scores['rouge2']
    rouge_l = rouge_scores['rougeL']

    # Store results for each article in a dictionary
    results.append({
        'id': article_id,
        'source_text': source_text,
        'reference_summary': reference_summary,
        'generated_summary': generated_summary,
        'rouge1': rouge_1,
        'rouge2': rouge_2,
        'rougel': rouge_l,
        '2-gram BLEU': bleu_2gram,
        '3-gram BLEU': bleu_3gram,
        '4-gram BLEU': bleu_4gram
    })

# Convert results to a DataFrame
df = pd.DataFrame(results)

# Define the Excel file name and the sheet name
file_name = 'Bleu.xlsx'
sheet_name = 'LSA'
with pd.ExcelWriter(file_name, engine='openpyxl', mode='a') as writer:
    df.to_excel(writer, sheet_name=sheet_name, index=False)
print("Success")


Success


**LUHN METHOD**

In [None]:
from sumy.summarizers.luhn import LuhnSummarizer

# Function for Luhn-based summarization
def luhn_method(text, sentence_count=3):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LuhnSummarizer()
    summary = summarizer(parser.document, sentence_count)
    summary_sentences = [str(sentence) for sentence in summary]
    return ' '.join(summary_sentences)

# Load the CNN/DailyMail dataset
cnn_dataset = load_dataset('cnn_dailymail', '3.0.0', split='test[:5]')  # Use only first 5 articles

# Prepare list to store results
results = []

# Process each article
for i, data in enumerate(cnn_dataset):
    article_id = i + 1
    source_text = data['article']
    reference_summary = data['highlights']

    # Generate summary using Luhn method
    generated_summary = luhn_method(source_text)

    # Tokenize summaries for BLEU calculation
    tokenized_generated = word_tokenize(generated_summary)
    tokenized_reference = word_tokenize(reference_summary)

    # Calculate BLEU scores for n-grams
    weights_2gram = (0.5, 0.5, 0, 0)
    weights_3gram = (0.33, 0.33, 0.34, 0)
    weights_4gram = (0.25, 0.25, 0.25, 0.25)

    bleu_2gram = sentence_bleu([tokenized_reference], tokenized_generated, weights=weights_2gram, smoothing_function=SmoothingFunction().method1)
    bleu_3gram = sentence_bleu([tokenized_reference], tokenized_generated, weights=weights_3gram, smoothing_function=SmoothingFunction().method1)
    bleu_4gram = sentence_bleu([tokenized_reference], tokenized_generated, weights=weights_4gram, smoothing_function=SmoothingFunction().method1)

    # Calculate ROUGE score
    rouge_scores = rouge.compute(predictions=[generated_summary], references=[reference_summary])
    rouge_1 = rouge_scores['rouge1']
    rouge_2 = rouge_scores['rouge2']
    rouge_l = rouge_scores['rougeL']

    # Store results for each article in a dictionary
    results.append({
        'id': article_id,
        'source_text': source_text,
        'reference_summary': reference_summary,
        'generated_summary': generated_summary,
        'rouge1': rouge_1,
        'rouge2': rouge_2,
        'rougel': rouge_l,
        '2-gram BLEU': bleu_2gram,
        '3-gram BLEU': bleu_3gram,
        '4-gram BLEU': bleu_4gram
    })

# Convert results to a DataFrame
df = pd.DataFrame(results)

# Define the Excel file name and the sheet name
file_name = 'Bleu.xlsx'
sheet_name = 'Luhn'
with pd.ExcelWriter(file_name, engine='openpyxl', mode='a') as writer:
    df.to_excel(writer, sheet_name=sheet_name, index=False)
print("Success")


Success


**LEXRANK METHOD**

In [None]:
from sumy.summarizers.lex_rank import LexRankSummarizer

# Function for LexRank-based summarization
def lexrank_method(text, sentence_count=3):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LexRankSummarizer()
    summary = summarizer(parser.document, sentence_count)
    summary_sentences = [str(sentence) for sentence in summary]
    return ' '.join(summary_sentences)

# Process each article
results = []
for i, data in enumerate(cnn_dataset):
    article_id = i + 1
    source_text = data['article']
    reference_summary = data['highlights']

    # Generate summary using LexRank method
    generated_summary = lexrank_method(source_text)

    # Tokenize summaries for BLEU calculation
    tokenized_generated = word_tokenize(generated_summary)
    tokenized_reference = word_tokenize(reference_summary)

    # Calculate BLEU scores for n-grams
    weights_2gram = (0.5, 0.5, 0, 0)
    weights_3gram = (0.33, 0.33, 0.34, 0)
    weights_4gram = (0.25, 0.25, 0.25, 0.25)

    bleu_2gram = sentence_bleu([tokenized_reference], tokenized_generated, weights=weights_2gram, smoothing_function=SmoothingFunction().method1)
    bleu_3gram = sentence_bleu([tokenized_reference], tokenized_generated, weights=weights_3gram, smoothing_function=SmoothingFunction().method1)
    bleu_4gram = sentence_bleu([tokenized_reference], tokenized_generated, weights=weights_4gram, smoothing_function=SmoothingFunction().method1)

    # Calculate ROUGE score
    rouge_scores = rouge.compute(predictions=[generated_summary], references=[reference_summary])
    rouge_1 = rouge_scores['rouge1']
    rouge_2 = rouge_scores['rouge2']
    rouge_l = rouge_scores['rougeL']

    # Store results for each article in a dictionary
    results.append({
        'id': article_id,
        'source_text': source_text,
        'reference_summary': reference_summary,
        'generated_summary': generated_summary,
        'rouge1': rouge_1,
        'rouge2': rouge_2,
        'rougel': rouge_l,
        '2-gram BLEU': bleu_2gram,
        '3-gram BLEU': bleu_3gram,
        '4-gram BLEU': bleu_4gram
    })

# Convert results to a DataFrame
df = pd.DataFrame(results)

# Define the Excel file name and the sheet name
file_name = 'Bleu.xlsx'
sheet_name = 'LexRank'
with pd.ExcelWriter(file_name, engine='openpyxl', mode='a') as writer:
    df.to_excel(writer, sheet_name=sheet_name, index=False)
print("Success")


Success


**ABSTRACTIVE METHODS**

**T5 METHOD**

In [None]:
!pip install --quiet transformers datasets evaluate rouge-score openpyxl pandas sumy nltk

In [None]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
from datasets import load_dataset
import evaluate
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.tokenize import word_tokenize
import nltk
import os

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('punkt_tab')

# Initialize ROUGE
rouge = evaluate.load('rouge')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Function to calculate ROUGE score
def compute_rouge(predictions, references):
    rouge_output = rouge.compute(predictions=predictions, references=references, use_stemmer=True)
    return rouge_output

# Function to calculate BLEU scores for different n-grams
def compute_bleu(prediction, reference):
    smoothing_function = SmoothingFunction().method1
    weights_2gram = (0.5, 0.5, 0, 0)
    weights_3gram = (0.33, 0.33, 0.34, 0)
    weights_4gram = (0.25, 0.25, 0.25, 0.25)

    bleu_2gram = sentence_bleu([word_tokenize(reference)], word_tokenize(prediction), weights=weights_2gram, smoothing_function=smoothing_function)
    bleu_3gram = sentence_bleu([word_tokenize(reference)], word_tokenize(prediction), weights=weights_3gram, smoothing_function=smoothing_function)
    bleu_4gram = sentence_bleu([word_tokenize(reference)], word_tokenize(prediction), weights=weights_4gram, smoothing_function=smoothing_function)

    return bleu_2gram, bleu_3gram, bleu_4gram

# Summarization with T5
def summarize_t5(texts, max_input_length=512, max_target_length=150):
    t5_tokenizer = T5Tokenizer.from_pretrained('t5-small')
    t5_model = T5ForConditionalGeneration.from_pretrained('t5-small').to(device)

    inputs = t5_tokenizer(texts, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length).to(device)
    summary_ids = t5_model.generate(inputs.input_ids, max_length=max_target_length, num_beams=4, early_stopping=True)

    return [t5_tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]

# Load a subset of the CNN/Daily Mail Dataset
cnn_dataset = load_dataset('cnn_dailymail', '3.0.0', split='test[:5]')  # Use only first 5 articles
source_texts = cnn_dataset['article']
reference_summaries = cnn_dataset['highlights']

# Generate summaries for T5
t5_summaries = summarize_t5(source_texts)

# Evaluate ROUGE for T5
t5_rouge = compute_rouge(t5_summaries, reference_summaries)

# Evaluate BLEU for T5
t5_bleu = [compute_bleu(pred, ref) for pred, ref in zip(t5_summaries, reference_summaries)]

# Store results in Excel
def save_t5_results_to_excel(source_texts, reference_summaries, t5_summaries, t5_rouge, t5_bleu):
    # Prepare T5 results
    t5_results = []
    for i in range(len(source_texts)):
        t5_results.append({
            "Source Text": source_texts[i],
            "Reference Summary": reference_summaries[i],
            "T5 Summary": t5_summaries[i],
            "T5 ROUGE-1": t5_rouge['rouge1'],
            "T5 ROUGE-2": t5_rouge['rouge2'],
            "T5 ROUGE-L": t5_rouge['rougeL'],
            "T5 BLEU 2-gram": t5_bleu[i][0],
            "T5 BLEU 3-gram": t5_bleu[i][1],
            "T5 BLEU 4-gram": t5_bleu[i][2]
        })

    df_t5_summaries = pd.DataFrame(t5_results)

    # Save to Excel file
    file_name = 'Bleu.xlsx'
    with pd.ExcelWriter(file_name, engine='openpyxl', mode='w' if not os.path.exists(file_name) else 'a') as writer:
        df_t5_summaries.to_excel(writer, sheet_name="T5", index=False)

save_t5_results_to_excel(source_texts, reference_summaries, t5_summaries, t5_rouge, t5_bleu)

print("Success")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Success


**BART Model**

In [None]:
import torch
from transformers import BartTokenizer, BartForConditionalGeneration
from datasets import load_dataset
import evaluate
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.tokenize import word_tokenize
import nltk
import os

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('punkt_tab')

# Initialize ROUGE
rouge = evaluate.load('rouge')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Function to calculate ROUGE score
def compute_rouge(predictions, references):
    rouge_output = rouge.compute(predictions=predictions, references=references, use_stemmer=True)
    return rouge_output

# Function to calculate BLEU scores for different n-grams
def compute_bleu(prediction, reference):
    smoothing_function = SmoothingFunction().method1
    weights_2gram = (0.5, 0.5, 0, 0)
    weights_3gram = (0.33, 0.33, 0.34, 0)
    weights_4gram = (0.25, 0.25, 0.25, 0.25)

    bleu_2gram = sentence_bleu([word_tokenize(reference)], word_tokenize(prediction), weights=weights_2gram, smoothing_function=smoothing_function)
    bleu_3gram = sentence_bleu([word_tokenize(reference)], word_tokenize(prediction), weights=weights_3gram, smoothing_function=smoothing_function)
    bleu_4gram = sentence_bleu([word_tokenize(reference)], word_tokenize(prediction), weights=weights_4gram, smoothing_function=smoothing_function)

    return bleu_2gram, bleu_3gram, bleu_4gram

# Summarization with BART
def summarize_bart(texts, max_input_length=512, max_target_length=150):
    bart_tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
    bart_model = BartForConditionalGeneration.from_pretrained('facebook/bart-base').to(device)

    inputs = bart_tokenizer(texts, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length).to(device)
    summary_ids = bart_model.generate(inputs.input_ids, max_length=max_target_length, num_beams=4, early_stopping=True)

    return [bart_tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]

# Load a subset of the CNN/Daily Mail Dataset
cnn_dataset = load_dataset('cnn_dailymail', '3.0.0', split='test[:5]')  # Use only first 5 articles
source_texts = cnn_dataset['article']
reference_summaries = cnn_dataset['highlights']

# Generate summaries for BART
bart_summaries = summarize_bart(source_texts)

# Evaluate ROUGE for BART
bart_rouge = compute_rouge(bart_summaries, reference_summaries)

# Evaluate BLEU for BART
bart_bleu = [compute_bleu(pred, ref) for pred, ref in zip(bart_summaries, reference_summaries)]

# Store results in Excel
def save_bart_results_to_excel(source_texts, reference_summaries, bart_summaries, bart_rouge, bart_bleu):
    # Prepare BART results
    bart_results = []
    for i in range(len(source_texts)):
        bart_results.append({
            "Source Text": source_texts[i],
            "Reference Summary": reference_summaries[i],
            "BART Summary": bart_summaries[i],
            "BART ROUGE-1": bart_rouge['rouge1'],
            "BART ROUGE-2": bart_rouge['rouge2'],
            "BART ROUGE-L": bart_rouge['rougeL'],
            "BART BLEU 2-gram": bart_bleu[i][0],
            "BART BLEU 3-gram": bart_bleu[i][1],
            "BART BLEU 4-gram": bart_bleu[i][2]
        })

    df_bart_summaries = pd.DataFrame(bart_results)

    # Save to Excel file
    file_name = 'Bleu.xlsx'
    with pd.ExcelWriter(file_name, engine='openpyxl', mode='w' if not os.path.exists(file_name) else 'a') as writer:
        df_bart_summaries.to_excel(writer, sheet_name="BART", index=False)

# Save BART results to Excel
save_bart_results_to_excel(source_texts, reference_summaries, bart_summaries, bart_rouge, bart_bleu)

print("Success")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

Success


**LLMs(large language models)**

In [None]:
from google.colab import userdata
import os
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

In [None]:
%pip install --upgrade --quiet tiktoken datasets langgraph beautifulsoup4 langchain langchain-google-genai langchain-huggingface nltk datasets evaluate openpyxl pandas transformers rouge_score

from datasets import load_dataset
import nltk
import torch
import pandas as pd
import evaluate
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.tokenize import word_tokenize
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

# Download NLTK data for tokenization
nltk.download('punkt')
nltk.download('punkt_tab')

# Initialize ROUGE
rouge = evaluate.load('rouge')
device = "cuda" if torch.cuda.is_available() else "cpu"

# Function to load LLM
def load_llm(model="gemini-1.5-pro"):
    if model == "gemini-1.5-pro":
        llm = ChatGoogleGenerativeAI(
            model="gemini-1.5-pro",
            temperature=0,
            max_tokens=None,
            timeout=None,
            max_retries=2)
        return llm
    elif model == "gemini-1.5-flash":
        llm = ChatGoogleGenerativeAI(
            model="gemini-1.5-flash",
            temperature=0,
            max_tokens=None,
            timeout=None,
            max_retries=2)
        return llm
    else:
        raise ValueError("Invalid model name")

# Function to get prompt template
def get_prompt_template():
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "Write a concise summary of the following in {num_words} words:\n\n",
            ),
            ("human", "{context}")
        ]
    )
    return prompt

# Function to summarize text
def summarize_text(text, num_words=50, model="gemini-1.5-pro"):
    llm = load_llm(model)
    prompt = get_prompt_template()
    chain = prompt | llm
    result = chain.invoke({
        "context": text,
        "num_words": num_words
    })
    return result.content

# Function to calculate ROUGE score
def compute_rouge(predictions, references):
    rouge_output = rouge.compute(predictions=predictions, references=references, use_stemmer=True)
    return {key: value * 100 for key, value in rouge_output.items()}

# Function to calculate BLEU scores for different n-grams
def compute_bleu(predictions, references):
    smoothing_function = SmoothingFunction().method1
    weights_2gram = (0.5, 0.5, 0, 0)
    weights_3gram = (0.33, 0.33, 0.34, 0)
    weights_4gram = (0.25, 0.25, 0.25, 0.25)

    scores_2gram = [sentence_bleu([word_tokenize(ref)], word_tokenize(pred), weights=weights_2gram, smoothing_function=smoothing_function)
                    for pred, ref in zip(predictions, references)]
    scores_3gram = [sentence_bleu([word_tokenize(ref)], word_tokenize(pred), weights=weights_3gram, smoothing_function=smoothing_function)
                    for pred, ref in zip(predictions, references)]
    scores_4gram = [sentence_bleu([word_tokenize(ref)], word_tokenize(pred), weights=weights_4gram, smoothing_function=smoothing_function)
                    for pred, ref in zip(predictions, references)]

    bleu_scores = {
        "2-gram BLEU": sum(scores_2gram) / len(scores_2gram) if scores_2gram else 0,
        "3-gram BLEU": sum(scores_3gram) / len(scores_3gram) if scores_3gram else 0,
        "4-gram BLEU": sum(scores_4gram) / len(scores_4gram) if scores_4gram else 0
    }
    return bleu_scores

# Load a subset of the CNN/Daily Mail Dataset
cnn_dataset = load_dataset('cnn_dailymail', '3.0.0', split='test[:5]')
source_texts = cnn_dataset['article']
reference_summaries = cnn_dataset['highlights']

# Generate summaries using LLM
llm_summaries = [summarize_text(text) for text in source_texts]

# Evaluate ROUGE for LLM
llm_rouge = compute_rouge(llm_summaries, reference_summaries)

# Evaluate BLEU for LLM
llm_bleu = compute_bleu(llm_summaries, reference_summaries)

# Store results in Excel
def save_results_to_excel(source_texts, reference_summaries, llm_summaries, llm_rouge, llm_bleu):
    llm_results = []
    for i in range(len(source_texts)):
        llm_results.append({
            "Source Text": source_texts[i],
            "Reference Summary": reference_summaries[i],
            "LLM Summary": llm_summaries[i],
            "LLM ROUGE-1": llm_rouge['rouge1'],
            "LLM ROUGE-2": llm_rouge['rouge2'],
            "LLM ROUGE-L": llm_rouge['rougeL'],
            "LLM BLEU 2-gram": llm_bleu["2-gram BLEU"],
            "LLM BLEU 3-gram": llm_bleu["3-gram BLEU"],
            "LLM BLEU 4-gram": llm_bleu["4-gram BLEU"]
        })

    df_llm_summaries = pd.DataFrame(llm_results)

    # Save to Excel file
    file_name = 'Bleu.xlsx'
    with pd.ExcelWriter(file_name, engine='openpyxl', mode='a' if os.path.exists(file_name) else 'w') as writer:
        df_llm_summaries.to_excel(writer, sheet_name="LLM", index=False)

# Save all results to Excel
save_results_to_excel(source_texts, reference_summaries, llm_summaries, llm_rouge, llm_bleu)

print("Success")
