In [None]:
# Install necessary libraries
!pip install transformers datasets nltk rouge-score

In [None]:
# Import libraries
import torch
import nltk
from transformers import GPT2LMHeadModel, GPT2Tokenizer, BartForConditionalGeneration, BartTokenizer, T5ForConditionalGeneration, T5Tokenizer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
import openai

In [None]:
# Download necessary NLTK resources
nltk.download('punkt')

In [None]:
# Sample reference and generated sentences (replace with actual generated emails from each model)
reference = [
    "Thank you for attending the meeting. Please find attached the proposal we discussed. Let me know your thoughts.",
    "It was great meeting with you. I’ve attached the necessary documents for the next steps."
]

# Example generated sentences from different models
generated_gpt2 = [
    "Thank you for attending the meeting. Attached is the proposal. Let me know your thoughts.",
    "It was great meeting with you. Attached are the documents for the next steps."
]

generated_bart = [
    "Thanks for attending the meeting. I've attached the proposal for review. Let me know your feedback.",
    "It was a pleasure meeting you. Here are the documents we discussed for next steps."
]

generated_t5 = [
    "Thank you for coming to the meeting. I have attached the proposal. Looking forward to your feedback.",
    "It was nice to meet you. I've attached the documents for the next steps."
]

generated_gpt3 = [
    "Thank you for attending the meeting. I’ve attached the proposal for your review. Let me know what you think.",
    "It was a pleasure meeting you. The documents for the next steps are attached."
]

generated_gpt4 = [
    "Thank you for attending the meeting. Attached is the proposal for your consideration. I look forward to your feedback.",
    "It was great to meet with you. The necessary documents for the next steps are attached."
]

In [None]:
# Define function to calculate BLEU score
def calculate_bleu(reference, generated):
    smoothie = SmoothingFunction().method4  # Smoothing for short texts
    bleu_scores = []
    for ref, gen in zip(reference, generated):
        ref_tokens = [nltk.word_tokenize(ref)]
        gen_tokens = nltk.word_tokenize(gen)
        bleu_score = sentence_bleu(ref_tokens, gen_tokens, smoothing_function=smoothie)
        bleu_scores.append(bleu_score)
    return sum(bleu_scores) / len(bleu_scores)

In [None]:
# Define function to calculate ROUGE score
def calculate_rouge(reference, generated):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    rouge_scores = []
    for ref, gen in zip(reference, generated):
        scores = scorer.score(ref, gen)
        rouge_scores.append(scores)
    avg_rouge = {
        "rouge1": sum([score['rouge1'].fmeasure for score in rouge_scores]) / len(rouge_scores),
        "rouge2": sum([score['rouge2'].fmeasure for score in rouge_scores]) / len(rouge_scores),
        "rougeL": sum([score['rougeL'].fmeasure for score in rouge_scores]) / len(rouge_scores),
    }
    return avg_rouge

In [None]:
# Calculate BLEU and ROUGE scores for each model
models = {
    "GPT-2": generated_gpt2,
    "BART": generated_bart,
    "T5": generated_t5,
    "GPT-3": generated_gpt3,
    "GPT-4": generated_gpt4
}

bleu_scores = {}
rouge_scores = {}

for model_name, generated_emails in models.items():
    bleu_scores[model_name] = calculate_bleu(reference, generated_emails)
    rouge_scores[model_name] = calculate_rouge(reference, generated_emails)

In [None]:
# Display BLEU scores
print("BLEU Scores:")
for model, score in bleu_scores.items():
    print(f"{model}: {score:.4f}")

In [None]:
# Display ROUGE scores
print("\nROUGE Scores:")
for model, rouge in rouge_scores.items():
    print(f"{model}: ROUGE-1: {rouge['rouge1']:.4f}, ROUGE-2: {rouge['rouge2']:.4f}, ROUGE-L: {rouge['rougeL']:.4f}")