# 04 - Model Evaluation

### Description:
This notebook loads the trained model and evaluates its performance using metrics like BLEU score.

In [None]:
# Import necessary libraries
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from nltk.translate.bleu_score import sentence_bleu

In [None]:
# Load the trained model and tokenizer
model = GPT2LMHeadModel.from_pretrained('models/trained_gpt2_model')
tokenizer = GPT2Tokenizer.from_pretrained('models/trained_gpt2_tokenizer')

In [None]:
# Example input to generate text
context = "I hope this email finds you well."
input_ids = tokenizer(context, return_tensors="pt")

In [None]:
# Generate email continuation based on the context
generated_email = model.generate(input_ids['input_ids'], max_length=100)

In [None]:
# Decode the generated email
generated_email_text = tokenizer.decode(generated_email[0], skip_special_tokens=True)
print("Generated Email:", generated_email_text)

In [None]:
# Example BLEU score evaluation
reference = [['i', 'hope', 'this', 'email', 'finds', 'you', 'well']]
generated = generated_email_text.lower().split()
bleu_score = sentence_bleu(reference, generated)
print(f"BLEU Score: {bleu_score}")