In [26]:
import pandas as pd
import numpy as np
import math
import random
import csv

import torch
from torch.utils.data import Dataset, DataLoader, random_split, RandomSampler, SequentialSampler

from transformers import GPT2LMHeadModel,  GPT2Tokenizer, GPT2Config
from transformers import get_linear_schedule_with_warmup

import nltk
nltk.download('punkt')

from tqdm import tqdm

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/matteorigat/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [27]:
# some parameters
epochs = 3
learning_rate = 1e-3
warmup_steps = 1e2
epsilon = 1e-8
model_name = "gpt2"
batch_size = 2


# this produces sample output every 100 steps
sample_every = 1000
# save the model every 5000 step
save_every = 5000
# save the model to this file name
save_model = "trial_2"

In [28]:
# load and also preprocess the raw data
def load_preprocess_raw_data(raw_data):
    recipe_instances = []

    with open(raw_data, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            # Extract relevant fields from CSV row
            #name = row['name'].lower().replace('"', '')  # Remove any extra quotes
            ingredients = row['ingredients'].lower().replace('\'', '').replace('[', '').replace(']', '')
            instructions = row['steps'].lower().replace('\'', '').replace('[', '').replace(']', '')
            
            # Prepare recipe instance string
            recipe_instance = '[BOS]'+ingredients+'[STEPS]'+instructions+'[EOS]' #+name+'[INGREDIENTS]'
            
            # Limit length to 2000 characters as per your function
            if len(recipe_instance) <= 2000:
                recipe_instances.append(recipe_instance)
    
    return recipe_instances

In [76]:
# create text list for dataset
# https://www.kaggle.com/datasets/shuyangli94/food-com-recipes-and-user-interactions/data
recipe_list = load_preprocess_raw_data("dataset/RAW_recipes.csv")

reduced_recipe_list = random.sample(recipe_list, int(0.002 * len(recipe_list)))
print(reduced_recipe_list[:1])

train_list, test_list = np.split(reduced_recipe_list, [int(.8*len(reduced_recipe_list))])
print('\nNumber of train data: ', len(train_list))
print('Number of test data: ', len(test_list))

['[BOS]vanilla ice cream, brandy, white creme de cacao, black coffee[STEPS]place all ingredients in a blender, blend on high speed until smooth, refrigerate at least 2 hours, "dont skip this step !", blend quickly on high just before serving[EOS]']

Number of train data:  365
Number of test data:  92


In [30]:
# Load the GPT tokenizer.
tokenizer = GPT2Tokenizer.from_pretrained(model_name, bos_token='[BOS]', eos_token='[EOS]', pad_token='[PAD]')
# add special tokens for title, ingredients and instruction seperator
special_tokens_dict = {'additional_special_tokens': ['[STEPS]']} #'[INGREDIENTS]', 
# check the number of special tokens
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
print('We have added', num_added_toks, 'tokens')

We have added 1 tokens


In [31]:
class GPT2Dataset(Dataset):

  def __init__(self, txt_list, tokenizer, max_length=768):

    self.tokenizer = tokenizer
    self.input_ids = []
    self.attn_masks = []

    for txt in txt_list:
        txt = self.custom_preprocessing(txt)
        
        encodings_dict = tokenizer(txt, truncation=True, max_length=max_length, padding="max_length")
    
        self.input_ids.append(torch.tensor(encodings_dict['input_ids']))
        self.attn_masks.append(torch.tensor(encodings_dict['attention_mask']))
          
  def custom_preprocessing(self, text):
    # Example preprocessing: Lowercase the text and remove punctuation
    text = text.lower()
    # Add more preprocessing steps as needed
    return text
    
  def __len__(self):
    return len(self.input_ids)

  def __getitem__(self, idx):
    return self.input_ids[idx], self.attn_masks[idx] 

In [32]:
dataset = GPT2Dataset(train_list, tokenizer, max_length=200)

# Split into training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

print('{:>5,} training samples'.format(train_size))
print('{:>5,} validation samples'.format(val_size))

  292 training samples
   73 validation samples


In [33]:
# Create the DataLoaders for our training and validation datasets.
# We'll take training samples in random order. 
train_dataloader = DataLoader(
            train_dataset,  # The training samples.
            sampler = RandomSampler(train_dataset), # Select batches randomly
            batch_size = batch_size # Trains with this batch size.
        )

# For validation the order doesn't matter, so we'll just read them sequentially.
validation_dataloader = DataLoader(
            val_dataset, # The validation samples.
            sampler = SequentialSampler(val_dataset), # Pull out batches sequentially.
            batch_size = batch_size # Evaluate with this batch size.
        )

In [34]:
# I'm not really doing anything with the config buheret
configuration = GPT2Config.from_pretrained(model_name, output_hidden_states=False)

# instantiate the model
model = GPT2LMHeadModel.from_pretrained(model_name, config=configuration)

# this step is necessary because I've added some tokens (bos_token, etc) to the embeddings
# otherwise the tokenizer and model tensors won't match up
model.resize_token_embeddings(len(tokenizer))

# Tell pytorch to run this model on the GPU.
if torch.cuda.is_available():
    device = torch.device("cuda")
    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)
else:
    device = torch.device("cpu")
    

# Set the seed value all over the place to make this reproducible.
seed_val = 42

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
if device == "cuda":
    torch.cuda.manual_seed_all(seed_val)



In [35]:
# Note: AdamW is a class from the huggingface library (as opposed to pytorch) 
optimizer = torch.optim.AdamW(model.parameters(),
                  lr = learning_rate,
                  eps = epsilon
                )

In [36]:
# Total number of training steps is [number of batches] x [number of epochs]. 
# (Note that this is not the same as the number of training samples).
total_steps = len(train_dataloader) * epochs
print('Total number of steps: ', total_steps)
# Create the learning rate scheduler.
# This changes the learning rate as the training loop progresses
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = warmup_steps, 
                                            num_training_steps = total_steps)

Total number of steps:  438


In [37]:
training_stats = []
print("Currently using device type: ", device)

model = model.to(device)

for epoch_i in range(0, epochs):

    # ========================================
    #               Training
    # ========================================

    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    losses = []

    total_train_loss = 0

    model.train()
    
    loop = tqdm(train_dataloader, leave=True)
    for step, batch in enumerate(loop):

        b_input_ids = batch[0].to(device)
        b_labels = batch[0].to(device)
        b_masks = batch[1].to(device)

        model.zero_grad()        

        outputs = model(  b_input_ids,
                          labels=b_labels, 
                          attention_mask =b_masks,
                          token_type_ids=None
                        )

        loss = outputs[0]  

        batch_loss = loss.item()
        total_train_loss += batch_loss
        losses.append(batch_loss)

        # Get sample every x batches.
        if step % sample_every == 0 and not step == 0:
            print('Batch {:>5,}  of  {:>5,}. Loss: {:>5,}.'.format(step, len(train_dataloader), batch_loss))

        loss.backward()

        optimizer.step()

        scheduler.step()

        if step % save_every == 0:
            model.save_pretrained(save_model)
            
        loop.set_postfix(loss=batch_loss)

    # Calculate the average loss over all of the batches.
    avg_train_loss = total_train_loss / len(train_dataloader)       
    
    # Calculate perplexity.
    losses = torch.tensor(losses)
    train_perplexity = math.exp(torch.mean(losses))

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Perplexity: {0:.2f}".format(train_perplexity))        
    # ========================================
    #               Validation
    # ========================================

    print("")
    print("Running Validation...")

    model.eval()

    losses = []
    total_eval_loss = 0
    nb_eval_steps = 0


    # Evaluate data for one epoch
    for batch in validation_dataloader:
        
        b_input_ids = batch[0].to(device)
        b_labels = batch[0].to(device)
        b_masks = batch[1].to(device)
        
        with torch.no_grad():        

            outputs  = model(b_input_ids, 
#                            token_type_ids=None, 
                             attention_mask = b_masks,
                            labels=b_labels)
          
            loss = outputs[0]  
            
        batch_loss = loss.item()
        losses.append(batch_loss)
        total_eval_loss += batch_loss        

    avg_val_loss = total_eval_loss / len(validation_dataloader)
    
    # Calculate perplexity.
    losses = torch.tensor(losses)
    val_perplexity = math.exp(torch.mean(losses))

    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation perplexity: {0:.2f}".format(val_perplexity))        

    # Record all statistics from this epoch.
    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Training Perplexity': train_perplexity,
            'Valid. Perplexity': val_perplexity,
        }
    )

print("")
print("Training complete!")

Currently using device type:  cpu

Training...


100%|██████████| 146/146 [02:44<00:00,  1.13s/it, loss=2.9]  



  Average training loss: 4.96
  Perplexity: 142.19

Running Validation...
  Validation Loss: 2.03
  Validation perplexity: 7.58

Training...


100%|██████████| 146/146 [02:39<00:00,  1.09s/it, loss=2.36] 



  Average training loss: 1.74
  Perplexity: 5.67

Running Validation...
  Validation Loss: 1.90
  Validation perplexity: 6.68

Training...


100%|██████████| 146/146 [02:31<00:00,  1.04s/it, loss=1.93] 



  Average training loss: 1.45
  Perplexity: 4.25

Running Validation...
  Validation Loss: 1.89
  Validation perplexity: 6.64

Training complete!


In [38]:
model.save_pretrained(save_model)

In [39]:
# prepare datasets for dev_list and test_list
test_dataset = GPT2Dataset(test_list, tokenizer, max_length=768)

In [40]:
# load the datasets
test_dataloader = DataLoader(
            test_dataset, # The validation samples.
            sampler = SequentialSampler(test_dataset), # Pull out batches sequentially.
            batch_size = batch_size # Evaluate with this batch size.
        )

In [41]:
def evaluate_model(model, dataloaded):
    model = model.to(device)
    model.eval()

    losses = []
    perplexity = []
    total_eval_loss = 0
    nb_eval_steps = 0

    # Evaluate data for one epoch
    for batch in dataloaded:

        b_input_ids = batch[0].to(device)
        b_labels = batch[0].to(device)
        b_masks = batch[1].to(device)

        with torch.no_grad():        

            outputs  = model(b_input_ids, 
    #                            token_type_ids=None, 
                            attention_mask = b_masks,
                            labels=b_labels)

            loss = outputs[0]  

        batch_loss = loss.item()
        losses.append(batch_loss)
        total_eval_loss += batch_loss        

    avg_val_loss = total_eval_loss / len(dataloaded)

    # Calculate perplexity.
    losses = torch.tensor(losses)
    val_perplexity = math.exp(torch.mean(losses))
    perplexity.append(val_perplexity)

    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation perplexity: {0:.2f}".format(val_perplexity))
    return avg_val_loss, val_perplexity

In [42]:
print('Testing...')
test_loss, test_perplexity = evaluate_model(model, test_dataloader)
test_eval_df = pd.DataFrame(columns = ["test_loss", "test_perplexity"])
test_eval_df['test_loss'] = test_loss
test_eval_df['test_perplexity'] = test_perplexity
test_eval_df.to_csv("test_eval.csv")

Testing...
  Validation Loss: 0.61
  Validation perplexity: 1.84


In [43]:
# Load the trained GPT-2 model and tokenizer
#model = GPT2LMHeadModel.from_pretrained(save_file)
#tokenizer = GPT2Tokenizer.from_pretrained(save_file

# Ensure the model is on the right device
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#model.to(device)

In [44]:
# Function to generate a recipe from a list of ingredients
"""def generate_recipe(ingredients, model, tokenizer, max_length=400):
    # Prepare the input prompt with the list of ingredients
    input_text = ingredients
    input_ids = tokenizer(input_text, return_tensors='pt').input_ids.to(device)
    
    # Generate the recipe
    output = model.generate(
        input_ids,
        max_length=max_length,
        num_beams=5,
        no_repeat_ngram_size=2,
        num_return_sequences=1,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id
    )
    
    # Decode the output to get the recipe text
    recipe = tokenizer.decode(output[0], skip_special_tokens=True)
    return recipe"""

"def generate_recipe(ingredients, model, tokenizer, max_length=400):\n    # Prepare the input prompt with the list of ingredients\n    input_text = ingredients\n    input_ids = tokenizer(input_text, return_tensors='pt').input_ids.to(device)\n    \n    # Generate the recipe\n    output = model.generate(\n        input_ids,\n        max_length=max_length,\n        num_beams=5,\n        no_repeat_ngram_size=2,\n        num_return_sequences=1,\n        pad_token_id=tokenizer.pad_token_id,\n        eos_token_id=tokenizer.eos_token_id\n    )\n    \n    # Decode the output to get the recipe text\n    recipe = tokenizer.decode(output[0], skip_special_tokens=True)\n    return recipe"

In [84]:
def generate_recipe(ingredients, model, tokenizer, max_length=100, temperature=0.1, top_k=50, top_p=0.1):
    input_text = '[BOS]' + ingredients + '[STEPS]'
    input_ids = tokenizer(input_text, return_tensors='pt').input_ids.to(device)
    
    output = model.generate(
        input_ids,
        max_length=max_length,
        temperature=temperature, # Lower values make the model more confident (less random), while higher values increase randomness.
        top_k=top_k,  #Increase to consider more tokens, decrease to restrict the model’s choices.
        top_p=top_p,  # Increase to allow more diversity, decrease to make the model more conservative.
        num_beams=5,
        no_repeat_ngram_size=2,
        num_return_sequences=1,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
        do_sample=True
    )
    
    recipe = tokenizer.decode(output[0], skip_special_tokens=False)
    
    # Replace lowercase special tokens with uppercase
    recipe = recipe.replace('[bos]', '[BOS]').replace('[steps]', '[STEPS]').replace('[eos]', '[EOS]')
    
    recipe = recipe.split('[EOS]', 1)[0] + '[EOS]'
        
    return recipe

In [85]:
def custom_preprocessing(text):
    # Example preprocessing: Lowercase the text and remove punctuation
    text = text.lower()
    #text = text.replace(",", "").replace(".", "").replace("!", "").replace("?", "").replace("(", "").replace(")", "").replace(":", "").replace(";", "").replace("'", "").replace('"', "")
    # Add more preprocessing steps as needed
    return text

In [86]:
def print_highlighted(generated_recipe, ingredients):
    recipe=generated_recipe
    ingredients_list = [ing.strip().lower() for ing in ingredients.split(',')]
    for ingredient in ingredients_list:
        recipe = recipe.replace(ingredient, f'\033[91m{ingredient}\033[0m')
    return recipe

In [87]:
# Example usage
ingredients = "pasta, tomato, garlic, onion, olive oil, salt, pepper, basil, parmesan cheese"
#ingredients = custom_preprocessing(ingredients)

generated_recipe = generate_recipe(ingredients, model, tokenizer)
    
print(print_highlighted(generated_recipe, ingredients))
print("\n", len(generated_recipe) - len(ingredients))

[BOS][91mflour[0m, [91msugar[0m, [91mcinnamon[0m, [91mcarrot[0m, [91mapple[0m, [91mwalnut[0m [STEPS] [PAD][BOS][PAD][PAD] [91mapple[0m cider vinegar[STEPS]combine the [91mflour[0m, [91msugar[0m, [91mcinnamon[0m, [91mcarrot[0m, [91mapple[0m, [91mwalnut[0ms, [91mapple[0msauce, and [91mapple[0m juice in a large bowl, mix well, pour into a greased 9x13-inch baking dish, bake at 350f for 30 minutes or until golden brown, remove from oven and let cool on wire racks, place the [91mapple[0m slices on a sheet pan and[EOS]

 361


In [88]:
from rouge import Rouge
import torch
from sklearn.metrics.pairwise import cosine_similarity
from transformers import BertModel, BertTokenizer

# Initialize models and tokenizers
model_name_bert = 'bert-base-uncased'
tokenizer_bert = BertTokenizer.from_pretrained(model_name_bert)
model_bert = BertModel.from_pretrained(model_name_bert)

rouge = Rouge()

# Function to calculate ROUGE-L F1 score
def calculate_rouge_score(text1, text2):
    scores = rouge.get_scores(text1, text2)
    rouge_l_f1 = scores[0]['rouge-l']['f']
    return rouge_l_f1

# Function to get GPT-2 embeddings
def get_gpt2_embedding(text, model, tokenizer):
    input_ids = tokenizer.encode(text, return_tensors='pt')
    with torch.no_grad():
        outputs = model(input_ids)
    hidden_states = outputs[0]
    pooled_embedding = torch.mean(hidden_states, dim=1)
    return pooled_embedding

# Function to calculate cosine similarity for GPT-2 embeddings
def calculate_gpt2_similarity(text1, text2, model, tokenizer):
    embedding1 = get_gpt2_embedding(text1, model, tokenizer)
    embedding2 = get_gpt2_embedding(text2, model, tokenizer)
    similarity = cosine_similarity(embedding1, embedding2).item()
    return similarity

# Function to encode text for BERT
def encode_text(text, tokenizer):
    input_ids = tokenizer.encode(text, return_tensors='pt', max_length=512, truncation=True)
    return input_ids

# Function to calculate BERT embeddings
def get_bert_embedding(input_ids, model):
    with torch.no_grad():
        outputs = model(input_ids)
        last_hidden_state = outputs.last_hidden_state
        pooled_embedding = torch.mean(last_hidden_state, dim=1)
    return pooled_embedding

# Function to calculate cosine similarity for BERT embeddings
def calculate_bert_similarity(text1, text2, model, tokenizer):
    input1 = encode_text(text1, tokenizer)
    input2 = encode_text(text2, tokenizer)
    embedding1 = get_bert_embedding(input1, model)
    embedding2 = get_bert_embedding(input2, model)
    similarity = cosine_similarity(embedding1.cpu(), embedding2.cpu()).item()
    return similarity

# Function to evaluate generated recipe against a list of real recipes
def evaluate_generated_recipe(generated_recipe, real_recipes):
    rouge_scores = []
    gpt2_similarities = []
    bert_similarities = []

    for real_recipe in real_recipes:
        rouge_score = calculate_rouge_score(generated_recipe, real_recipe)
        gpt2_similarity = calculate_gpt2_similarity(generated_recipe, real_recipe, model, tokenizer)
        bert_similarity = calculate_bert_similarity(generated_recipe, real_recipe, model_bert, tokenizer_bert)

        rouge_scores.append(rouge_score)
        gpt2_similarities.append(gpt2_similarity)
        bert_similarities.append(bert_similarity)

    # Calculate average scores
    avg_scores = [(sum(scores) / len(scores)) for scores in zip(rouge_scores)]
    #, gpt2_similarities, bert_similarities

    # Find index of recipe with maximum average score
    max_index = avg_scores.index(max(avg_scores))

    return real_recipes[max_index], rouge_scores[max_index], gpt2_similarities[max_index], bert_similarities[max_index]

In [89]:
#reduced_recipe_list2 = ["Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.","Start with a ball of pizza dough, roll it out into a thin crust. Spread a layer of tomato sauce evenly over the dough. Add a generous amount of shredded mozzarella cheese on top. Optionally, sprinkle with dried oregano and a pinch of salt. Preheat your oven to a high temperature, around 450°F (230°C). Place the pizza on a baking sheet or pizza stone and bake for 10-15 minutes, or until the crust is golden brown and the cheese is bubbly. Remove from the oven, let it cool slightly, then slice and enjoy your delicious homemade pizza!"]

In [90]:
best_recipe = evaluate_generated_recipe(generated_recipe, reduced_recipe_list)

print("Generated Recipe:")
print(print_highlighted(generated_recipe, ingredients))
print("\nMost Similar Real Recipe:")
print(print_highlighted(best_recipe[0], ingredients), "\n\nrouge-l f1:", best_recipe[1], "\nGPT-2 similarity:", best_recipe[2], "\nBERT similarity:", best_recipe[3])

Generated Recipe:
[BOS][91mflour[0m, [91msugar[0m, [91mcinnamon[0m, [91mcarrot[0m, [91mapple[0m, [91mwalnut[0m [STEPS] [PAD][BOS][PAD][PAD] [91mapple[0m cider vinegar[STEPS]combine the [91mflour[0m, [91msugar[0m, [91mcinnamon[0m, [91mcarrot[0m, [91mapple[0m, [91mwalnut[0ms, [91mapple[0msauce, and [91mapple[0m juice in a large bowl, mix well, pour into a greased 9x13-inch baking dish, bake at 350f for 30 minutes or until golden brown, remove from oven and let cool on wire racks, place the [91mapple[0m slices on a sheet pan and[EOS]

Most Similar Real Recipe:
[BOS]stale bread, eggs, [91msugar[0m, butter, crushed pine[91mapple[0m, salt[STEPS]cream [91msugar[0m margarine or butter , salt and eggs, stir in pine[91mapple[0m and fold in bread, pour in greased casserole, bake , uncovered at 350f for 45 minutes or until crispy[EOS] 

rouge-l f1: 0.28915662182609964 
GPT-2 similarity: 0.9977008104324341 
BERT similarity: 0.9282747507095337


In [91]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Function to extract ingredients from a recipe
def extract_ingredients(recipe):
    start = recipe.find('[BOS]') + len('[BOS]')
    end = recipe.find('[STEPS]')
    ingredients = recipe[start:end].strip()
    return ingredients

# Function to calculate cosine similarity for ingredient lists
def calculate_ingredient_similarity(ingredients1, ingredients2):
    vectorizer = TfidfVectorizer().fit_transform([ingredients1, ingredients2])
    vectors = vectorizer.toarray()
    cosine_sim = cosine_similarity(vectors)
    return cosine_sim[0, 1]

# Function to evaluate generated recipe against a list of real recipes
def evaluate_generated_recipe_by_ingredients(generated_recipe, real_recipes, top_k=5):
    generated_ingredients = extract_ingredients(generated_recipe)
    
    similarities = []
    for real_recipe in real_recipes:
        real_ingredients = extract_ingredients(real_recipe)
        similarity = calculate_ingredient_similarity(generated_ingredients, real_ingredients)
        similarities.append((real_recipe, similarity))
    
    # Sort recipes based on ingredient similarity
    similarities.sort(key=lambda x: x[1], reverse=True)
    
    # Get top k recipes
    top_k_recipes = similarities[:top_k]
    
    results = []
    for real_recipe, sim in top_k_recipes:
        rouge_score = calculate_rouge_score(generated_recipe, real_recipe)
        gpt2_similarity = calculate_gpt2_similarity(generated_recipe, real_recipe, model, tokenizer)
        bert_similarity = calculate_bert_similarity(generated_recipe, real_recipe, model_bert, tokenizer_bert)
        results.append((real_recipe, sim, rouge_score, gpt2_similarity, bert_similarity))
    
    return results

In [92]:
# Evaluate and print top k recipes
top_k = 5
top_k_recipes = evaluate_generated_recipe_by_ingredients(generated_recipe, reduced_recipe_list, top_k=top_k)

for i, (recipe, ingredient_sim, rouge_score, gpt2_sim, bert_sim) in enumerate(top_k_recipes):
    print(f"\nRecipe {i+1} (Ingredient Similarity: {ingredient_sim:.2f}):")
    print(print_highlighted(recipe, ingredients))
    print(f"ROUGE-L F1: {rouge_score:.4f}")
    print(f"GPT-2 Similarity: {gpt2_sim:.4f}")
    print(f"BERT Similarity: {bert_sim:.4f}")


Recipe 1 (Ingredient Similarity: 0.36):
[BOS][91mapple[0ms, [91mcinnamon[0m, water, butter, [91mflour[0m, [91msugar[0m, brown [91msugar[0m[STEPS]preheat oven to 350, grease 8 x 8 dish, peel and cut [91mapple[0ms, place [91mapple[0ms in pan, add [91mcinnamon[0m and water, separately , mix melted butter , [91mflour[0m and [91msugar[0ms, spread evenly and firmly on top of [91mapple[0m mixture, bake 45-50 minutes[EOS]
ROUGE-L F1: 0.1684
GPT-2 Similarity: 0.9992
BERT Similarity: 0.9510

Recipe 2 (Ingredient Similarity: 0.30):
[BOS]sour cream, butter, egg, bran flakes, [91mflour[0m, [91msugar[0m, [91mcinnamon[0m, baking soda, salt, [91mapple[0m[STEPS]preheat oven to 375, grease muffin tin, combine sour cream , butter and egg, add bran flakes , let stand till softened, add combined dry ingredients , mixing till just moistened, fold in [91mapple[0ms, spoon into greased muffin cups , filling cups 3 / 4 full, bake at 375 25 minutes[EOS]
ROUGE-L F1: 0.1698
GPT-2 S