In [1]:
# Cell 1: Install necessary libraries
!pip install torch torchtext transformers sentencepiece pandas tqdm datasets rouge-score sacrebleu

# Cell 2: Import required libraries
from datasets import load_dataset, DatasetDict, Dataset
import pandas as pd
import ast
import datasets
from tqdm import tqdm
import time
import os

# Cell 3: Load dataset from Kaggle
# Read the CSV file
data_path = '/kaggle/input/3a2mext/3A2M_EXTENDED.csv'
df_raw = pd.read_csv(data_path)

# Display basic information
print("Dataset loaded successfully!")
print(f"Total rows: {len(df_raw)}")
print("\nColumn names:", df_raw.columns.tolist())
print("\nFirst 3 rows:")
display(df_raw.head(3))

# Cell 4: Data preprocessing and sampling
# Sample a subset for faster training (you can increase this for full training)
# Using 50,000 samples for manageable training time
SAMPLE_SIZE = 50000
df_sampled = df_raw.sample(n=min(SAMPLE_SIZE, len(df_raw)), random_state=42)

# Clean and prepare the data
def clean_text(text):
    """Remove extra whitespace and clean text"""
    if isinstance(text, str):
        return ' '.join(text.split())
    return ''

def parse_ner_list(ner_string):
    """Parse NER string to list of ingredients"""
    try:
        ingredients = ast.literal_eval(ner_string)
        if isinstance(ingredients, list):
            return ', '.join(ingredients)
        return ''
    except:
        return ''

def parse_directions_list(directions_string):
    """Parse directions string to readable format"""
    try:
        directions = ast.literal_eval(directions_string)
        if isinstance(directions, list):
            return ' '.join([f"Step {i+1}: {step}" for i, step in enumerate(directions)])
        return directions_string
    except:
        return directions_string

# Apply cleaning
df_sampled['title'] = df_sampled['title'].apply(clean_text)
df_sampled['ingredients'] = df_sampled['NER'].apply(parse_ner_list)
df_sampled['directions'] = df_sampled['directions'].apply(parse_directions_list)

# Remove rows with missing data
df_sampled = df_sampled[
    (df_sampled['title'].str.len() > 0) & 
    (df_sampled['ingredients'].str.len() > 0) & 
    (df_sampled['directions'].str.len() > 0)
].copy()

print(f"\nCleaned dataset size: {len(df_sampled)}")
print("\nSample cleaned data:")
display(df_sampled[['title', 'ingredients', 'directions']].head(3))

# Cell 5: Create formatted dataset
# Format: "Recipe: [TITLE] | Ingredients: [INGREDIENTS] | Directions: [DIRECTIONS]"
def create_recipe_text(row):
    """Create formatted recipe text for training"""
    return f"Recipe: {row['title']} | Ingredients: {row['ingredients']} | Directions: {row['directions']}"

df_sampled['recipe_text'] = df_sampled.apply(create_recipe_text, axis=1)

# Create a simpler dataframe with only what we need
df = df_sampled[['title', 'ingredients', 'recipe_text']].copy()
df.columns = ['Title', 'Ingredients', 'FullRecipe']

print("\nFinal dataset structure:")
display(df.head(3))

# Cell 6: Import transformers and torch libraries
from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split

# Cell 7: Determine device
if torch.cuda.is_available():
    device = torch.device('cuda')
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    try:
        if torch.backends.mps.is_available():
            device = torch.device('mps')
            print("Using MPS (Apple Silicon)")
        else:
            device = torch.device('cpu')
            print("MPS device not available, falling back to CPU.")
    except AttributeError:
        device = torch.device('cpu')
        print("MPS support not detected, falling back to CPU.")

print(f"Device: {device}")

# Cell 8: Load tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
print("Tokenizer loaded successfully!")

# Cell 9: Load GPT-2 model
model = GPT2LMHeadModel.from_pretrained('gpt2').to(device)
print("Model loaded successfully!")
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

# Cell 10: Define model parameters
BATCH_SIZE = 4  # Reduced for recipe generation (longer sequences)
MAX_LENGTH = 512  # Longer sequences for recipes

print(f"Batch size: {BATCH_SIZE}")
print(f"Max sequence length: {MAX_LENGTH}")

# Cell 11: Dataset statistics
print("Dataset Statistics:")
print(f"Total recipes: {len(df)}")
print("\nRecipe text length distribution:")
df['text_length'] = df['FullRecipe'].str.len()
print(df['text_length'].describe())

# Cell 12: Custom Dataset class
class RecipeDataset(Dataset):
    """
    Custom Dataset for recipe generation
    """
    def __init__(self, df, tokenizer, max_length=512):
        self.data = df['FullRecipe'].tolist()
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text = self.data[idx]
        
        # Tokenize with attention mask
        encodings = self.tokenizer(
            text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encodings['input_ids'].squeeze(0),
            'attention_mask': encodings['attention_mask'].squeeze(0)
        }

# Cell 13: Create dataset instance
recipe_dataset = RecipeDataset(df, tokenizer, max_length=MAX_LENGTH)
print(f"Dataset created with {len(recipe_dataset)} samples")

# Cell 14: Train-validation split
train_size = int(0.9 * len(recipe_dataset))
valid_size = len(recipe_dataset) - train_size
train_data, valid_data = random_split(recipe_dataset, [train_size, valid_size])

print(f"Training samples: {train_size}")
print(f"Validation samples: {valid_size}")

# Cell 15: Create DataLoaders
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=BATCH_SIZE)

print(f"Training batches: {len(train_loader)}")
print(f"Validation batches: {len(valid_loader)}")

# Cell 16: Training parameters
num_epochs = 3
learning_rate = 5e-5
weight_decay = 0.01

print(f"Training configuration:")
print(f"  Epochs: {num_epochs}")
print(f"  Learning rate: {learning_rate}")
print(f"  Weight decay: {weight_decay}")

# Cell 17: Define training components
batch_size = BATCH_SIZE
model_name = 'gpt2'
gpu = 0

criterion = nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

print("Optimizer and loss function initialized")

# Cell 18: Create results tracking and checkpoint directory
results = pd.DataFrame(columns=[
    'epoch', 'transformer', 'batch_size', 'gpu',
    'training_loss', 'validation_loss', 'epoch_duration_sec'
])

# Create checkpoint directory
checkpoint_dir = 'model_checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)
print(f"Checkpoint directory created: {checkpoint_dir}")

# Cell 19: Training loop with checkpointing
best_val_loss = float('inf')

for epoch in range(num_epochs):
    start_time = time.time()
    
    # Training phase
    model.train()
    epoch_training_loss = 0
    train_iterator = tqdm(
        train_loader, 
        desc=f"Training Epoch {epoch+1}/{num_epochs}"
    )
    
    for batch in train_iterator:
        optimizer.zero_grad()
        
        inputs = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        
        # Forward pass
        outputs = model(
            input_ids=inputs,
            attention_mask=attention_mask,
            labels=inputs
        )
        
        loss = outputs.loss
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        
        train_iterator.set_postfix({'Training Loss': f'{loss.item():.4f}'})
        epoch_training_loss += loss.item()
    
    avg_epoch_training_loss = epoch_training_loss / len(train_iterator)
    
    # Validation phase
    model.eval()
    epoch_validation_loss = 0
    valid_iterator = tqdm(valid_loader, desc=f"Validation Epoch {epoch+1}/{num_epochs}")
    
    with torch.no_grad():
        for batch in valid_iterator:
            inputs = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            outputs = model(
                input_ids=inputs,
                attention_mask=attention_mask,
                labels=inputs
            )
            
            loss = outputs.loss
            valid_iterator.set_postfix({'Validation Loss': f'{loss.item():.4f}'})
            epoch_validation_loss += loss.item()
    
    avg_epoch_validation_loss = epoch_validation_loss / len(valid_loader)
    
    end_time = time.time()
    epoch_duration_sec = end_time - start_time
    
    # Save results
    new_row = {
        'transformer': model_name,
        'batch_size': batch_size,
        'gpu': gpu,
        'epoch': epoch+1,
        'training_loss': avg_epoch_training_loss,
        'validation_loss': avg_epoch_validation_loss,
        'epoch_duration_sec': epoch_duration_sec
    }
    
    results.loc[len(results)] = new_row
    
    # Save checkpoint
    checkpoint_path = os.path.join(checkpoint_dir, f'checkpoint_epoch_{epoch+1}.pt')
    torch.save({
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_loss': avg_epoch_training_loss,
        'val_loss': avg_epoch_validation_loss,
    }, checkpoint_path)
    
    print(f"\nEpoch {epoch+1} Summary:")
    print(f"  Training Loss: {avg_epoch_training_loss:.4f}")
    print(f"  Validation Loss: {avg_epoch_validation_loss:.4f}")
    print(f"  Duration: {epoch_duration_sec:.2f}s")
    print(f"  Checkpoint saved: {checkpoint_path}")
    
    # Save best model
    if avg_epoch_validation_loss < best_val_loss:
        best_val_loss = avg_epoch_validation_loss
        best_model_path = os.path.join(checkpoint_dir, 'best_model.pt')
        torch.save(model.state_dict(), best_model_path)
        print(f"  Best model saved! (Val Loss: {best_val_loss:.4f})")

# Cell 20: Display training results
print("\nTraining Results:")
display(results)

# Cell 21: Generate recipe from ingredients
def generate_recipe(ingredients_list, max_length=300, temperature=0.8):
    """Generate a recipe from a list of ingredients"""
    model.eval()
    
    # Format input
    if isinstance(ingredients_list, list):
        ingredients = ', '.join(ingredients_list)
    else:
        ingredients = ingredients_list
    
    prompt = f"Recipe: | Ingredients: {ingredients} | Directions:"
    
    # Tokenize
    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
    
    # Generate
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=max_length,
            num_return_sequences=1,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=temperature,
            repetition_penalty=1.2,
            no_repeat_ngram_size=3,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )
    
    # Decode
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    return generated_text

# Cell 22: Test generation - Example 1
print("=" * 80)
print("EXAMPLE 1: Chicken and Vegetables")
print("=" * 80)

ingredients1 = ["chicken breast", "bell peppers", "onion", "garlic", "olive oil", "salt", "pepper"]
recipe1 = generate_recipe(ingredients1)
print(recipe1)

# Cell 23: Test generation - Example 2
print("\n" + "=" * 80)
print("EXAMPLE 2: Pasta Dish")
print("=" * 80)

ingredients2 = ["pasta", "tomatoes", "basil", "parmesan cheese", "garlic"]
recipe2 = generate_recipe(ingredients2)
print(recipe2)

# Cell 24: Test generation - Example 3
print("\n" + "=" * 80)
print("EXAMPLE 3: Dessert")
print("=" * 80)

ingredients3 = ["flour", "sugar", "eggs", "butter", "chocolate chips"]
recipe3 = generate_recipe(ingredients3)
print(recipe3)

# Cell 25: BLEU Score Evaluation
from sacrebleu.metrics import BLEU

def calculate_bleu(references, hypotheses):
    """Calculate BLEU score"""
    bleu = BLEU()
    score = bleu.corpus_score(hypotheses, [references])
    return score.score

# Generate predictions for validation set (sample)
print("Generating predictions for BLEU evaluation...")
sample_size = min(100, len(valid_data))
references = []
hypotheses = []

for i in tqdm(range(sample_size)):
    sample = valid_data.dataset.data[valid_data.indices[i]]
    
    # Extract ingredients from full recipe text
    parts = sample.split('|')
    if len(parts) >= 2:
        ingredients = parts[1].replace('Ingredients:', '').strip()
        reference = sample
        
        # Generate hypothesis
        hypothesis = generate_recipe(ingredients, max_length=200)
        
        references.append(reference)
        hypotheses.append(hypothesis)

bleu_score = calculate_bleu(references, hypotheses)
print(f"\nBLEU Score: {bleu_score:.2f}")

# Cell 26: ROUGE Score Evaluation
from rouge_score import rouge_scorer

def calculate_rouge(references, hypotheses):
    """Calculate ROUGE scores"""
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    
    rouge1_scores = []
    rouge2_scores = []
    rougeL_scores = []
    
    for ref, hyp in zip(references, hypotheses):
        scores = scorer.score(ref, hyp)
        rouge1_scores.append(scores['rouge1'].fmeasure)
        rouge2_scores.append(scores['rouge2'].fmeasure)
        rougeL_scores.append(scores['rougeL'].fmeasure)
    
    return {
        'rouge1': sum(rouge1_scores) / len(rouge1_scores),
        'rouge2': sum(rouge2_scores) / len(rouge2_scores),
        'rougeL': sum(rougeL_scores) / len(rougeL_scores)
    }

rouge_scores = calculate_rouge(references, hypotheses)
print("\nROUGE Scores:")
print(f"  ROUGE-1: {rouge_scores['rouge1']:.4f}")
print(f"  ROUGE-2: {rouge_scores['rouge2']:.4f}")
print(f"  ROUGE-L: {rouge_scores['rougeL']:.4f}")

# Cell 27: Quality evaluation summary
evaluation_results = pd.DataFrame({
    'Metric': ['BLEU', 'ROUGE-1', 'ROUGE-2', 'ROUGE-L'],
    'Score': [
        bleu_score,
        rouge_scores['rouge1'],
        rouge_scores['rouge2'],
        rouge_scores['rougeL']
    ]
})

print("\n" + "=" * 80)
print("EVALUATION SUMMARY")
print("=" * 80)
display(evaluation_results)

# Cell 28: Save final model
final_model_path = 'RecipeGenerationGPT2.pt'
torch.save(model.state_dict(), final_model_path)
print(f"\nFinal model saved to: {final_model_path}")

# Save complete model with tokenizer
model.save_pretrained('./recipe_gpt2_model')
tokenizer.save_pretrained('./recipe_gpt2_model')
print("Model and tokenizer saved to: ./recipe_gpt2_model")

# Cell 29: Human evaluation template
print("\n" + "=" * 80)
print("HUMAN EVALUATION GUIDE")
print("=" * 80)
print("""
For human evaluation, rate generated recipes on:

1. Coherence (1-5): Does the recipe make logical sense?
2. Creativity (1-5): Is the recipe interesting and novel?
3. Completeness (1-5): Are all necessary steps included?
4. Ingredient Usage (1-5): Are the ingredients used appropriately?
5. Overall Quality (1-5): Would you try this recipe?

Sample a set of generated recipes and have human evaluators rate them.
Calculate average scores across all metrics.
""")

# Cell 30: Example recipes for human evaluation
print("\n" + "=" * 80)
print("SAMPLE RECIPES FOR HUMAN EVALUATION")
print("=" * 80)

test_ingredients = [
    ["salmon", "lemon", "dill", "butter"],
    ["ground beef", "taco seasoning", "lettuce", "cheese", "tomatoes"],
    ["rice", "chicken", "soy sauce", "vegetables"],
    ["chocolate", "cream", "vanilla", "strawberries"],
    ["potatoes", "bacon", "cheese", "sour cream"]
]

for i, ingredients in enumerate(test_ingredients, 1):
    print(f"\n{'-' * 80}")
    print(f"Test Recipe {i}: {', '.join(ingredients)}")
    print(f"{'-' * 80}")
    recipe = generate_recipe(ingredients, temperature=0.7)
    print(recipe)
    print()

Collecting torchtext
  Downloading torchtext-0.18.0-cp311-cp311-manylinux1_x86_64.whl.metadata (7.9 kB)
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sacrebleu
  Downloading sacrebleu-2.5.1-py3-none-any.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-

Unnamed: 0,title,NER,Extended_NER,genre,label,directions
0,\t Arugula Pomegranate Salad,"[""baby spinach"", ""baby arugula"", ""pomegranate ...","['alfalfa sprouts', 'baby spinach', 'baby arug...",vegetables,4,"[""Toss together spinach and arugula, then plac..."
1,\t Black Bean And Turkey Chili,"[""olive oil"", ""yellow onion"", ""garlic"", ""groun...","['one', 'yellow onion', 'tomato paste', 'about...",sides,8,"[""Dice the onion and mince the garlic. Add the..."
2,\t Finger Lickin' Tofu Nuggets,"[""extra firm"", ""almond flour"", ""nutritional ye...","['extra firm', '2', 'coconut oil', 'almond flo...",nonveg,3,"[""Wrap the tofu in a clean tea towel and press..."



Cleaned dataset size: 49978

Sample cleaned data:


Unnamed: 0,title,ingredients,directions
1135333,Jamaican Sweet Potato Pone,"sweet potatoes, brown sugar, butter, orange ju...",Step 1: Preheat the oven to 350°F. Step 2: Coo...
1631916,Pumpkin Chip Cookies,"pumpkin, eggs, sugar, baking powder, cinnamon,...",Step 1: Mix ingredients in order given. Step 2...
1609741,Potato Soup,"potatoes, milk, butter, flour, onion, celery, ...",Step 1: Saute first 3 ingredients together in ...



Final dataset structure:


Unnamed: 0,Title,Ingredients,FullRecipe
1135333,Jamaican Sweet Potato Pone,"sweet potatoes, brown sugar, butter, orange ju...",Recipe: Jamaican Sweet Potato Pone | Ingredien...
1631916,Pumpkin Chip Cookies,"pumpkin, eggs, sugar, baking powder, cinnamon,...",Recipe: Pumpkin Chip Cookies | Ingredients: pu...
1609741,Potato Soup,"potatoes, milk, butter, flour, onion, celery, ...","Recipe: Potato Soup | Ingredients: potatoes, m..."


2025-11-01 17:03:26.880394: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762016607.258057      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762016607.383047      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Using GPU: Tesla T4
Device: cuda


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Tokenizer loaded successfully!


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Model loaded successfully!
Model parameters: 124,439,808
Batch size: 4
Max sequence length: 512
Dataset Statistics:
Total recipes: 49978

Recipe text length distribution:
count    49978.000000
mean       689.818920
std        510.750009
min         89.000000
25%        365.000000
50%        539.000000
75%        851.000000
max      13121.000000
Name: text_length, dtype: float64
Dataset created with 49978 samples
Training samples: 44980
Validation samples: 4998
Training batches: 11245
Validation batches: 1250
Training configuration:
  Epochs: 3
  Learning rate: 5e-05
  Weight decay: 0.01
Optimizer and loss function initialized
Checkpoint directory created: model_checkpoints


Training Epoch 1/3:   0%|          | 0/11245 [00:00<?, ?it/s]`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.
Training Epoch 1/3: 100%|██████████| 11245/11245 [1:59:35<00:00,  1.57it/s, Training Loss=0.7122]
Validation Epoch 1/3: 100%|██████████| 1250/1250 [03:58<00:00,  5.25it/s, Validation Loss=0.3538]



Epoch 1 Summary:
  Training Loss: 0.6737
  Validation Loss: 0.6186
  Duration: 7413.28s
  Checkpoint saved: model_checkpoints/checkpoint_epoch_1.pt
  Best model saved! (Val Loss: 0.6186)


Training Epoch 2/3: 100%|██████████| 11245/11245 [1:59:43<00:00,  1.57it/s, Training Loss=0.7276]
Validation Epoch 2/3: 100%|██████████| 1250/1250 [03:58<00:00,  5.25it/s, Validation Loss=0.3252]



Epoch 2 Summary:
  Training Loss: 0.6026
  Validation Loss: 0.5929
  Duration: 7421.37s
  Checkpoint saved: model_checkpoints/checkpoint_epoch_2.pt
  Best model saved! (Val Loss: 0.5929)


Training Epoch 3/3: 100%|██████████| 11245/11245 [1:59:45<00:00,  1.56it/s, Training Loss=0.4802]
Validation Epoch 3/3: 100%|██████████| 1250/1250 [03:58<00:00,  5.25it/s, Validation Loss=0.3188]



Epoch 3 Summary:
  Training Loss: 0.5700
  Validation Loss: 0.5776
  Duration: 7424.09s
  Checkpoint saved: model_checkpoints/checkpoint_epoch_3.pt
  Best model saved! (Val Loss: 0.5776)

Training Results:


Unnamed: 0,epoch,transformer,batch_size,gpu,training_loss,validation_loss,epoch_duration_sec
0,1,gpt2,4,0,0.673678,0.618619,7413.283098
1,2,gpt2,4,0,0.602592,0.592891,7421.374798
2,3,gpt2,4,0,0.569975,0.577573,7424.094941


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


EXAMPLE 1: Chicken and Vegetables
Recipe: | Ingredients: chicken breast, bell peppers, onion, garlic, olive oil, salt, pepper | Directions: Step 1: Brown the chicken in a large frying pan. Step 2: Add all other ingredients and cook over medium heat until sauce is thickened.

EXAMPLE 2: Pasta Dish
Recipe: | Ingredients: pasta, tomatoes, basil, parmesan cheese, garlic | Directions: Step 1: Cook the pasta as directed on package. Step 2: Drain, reserving 1 cup of cooking water. Step 3: Stir in tomatoes and basil. Step 4: Mix in the Parmesan cheese. Step 5: Fold in the cooked pasta, the rest of the ingredients, and pour into a greased baking dish. Step 6: Bake in a 350 degree oven for 30 minutes, uncovered or until bubbly.

EXAMPLE 3: Dessert
Recipe: | Ingredients: flour, sugar, eggs, butter, chocolate chips | Directions: Step 1: Sift together the flour, baking powder and salt. Step 2: Combine with the sugar and eggs; add all at once to dry ingredients just until moistened. Step 3: Drop by 

100%|██████████| 100/100 [01:03<00:00,  1.58it/s]



BLEU Score: 21.00

ROUGE Scores:
  ROUGE-1: 0.4337
  ROUGE-2: 0.2708
  ROUGE-L: 0.3699

EVALUATION SUMMARY


Unnamed: 0,Metric,Score
0,BLEU,21.004285
1,ROUGE-1,0.433694
2,ROUGE-2,0.270821
3,ROUGE-L,0.369939



Final model saved to: RecipeGenerationGPT2.pt
Model and tokenizer saved to: ./recipe_gpt2_model

HUMAN EVALUATION GUIDE

For human evaluation, rate generated recipes on:

1. Coherence (1-5): Does the recipe make logical sense?
2. Creativity (1-5): Is the recipe interesting and novel?
3. Completeness (1-5): Are all necessary steps included?
4. Ingredient Usage (1-5): Are the ingredients used appropriately?
5. Overall Quality (1-5): Would you try this recipe?

Sample a set of generated recipes and have human evaluators rate them.
Calculate average scores across all metrics.


SAMPLE RECIPES FOR HUMAN EVALUATION

--------------------------------------------------------------------------------
Test Recipe 1: salmon, lemon, dill, butter
--------------------------------------------------------------------------------
Recipe: | Ingredients: salmon, lemon, dill, butter | Directions: Step 1: Mix all ingredients together and cook over medium heat for 2 minutes. Step 2: Serve with any meat or fi