# 1. Imports

In [2]:
import torch
import csv
import random
import re
import ast
import numpy as np
from tqdm.notebook import tqdm
import pickle # <-- Added
import os     # <-- Added
import nltk   # <-- Added
from nltk.corpus import stopwords # <-- Added
from nltk import WordNetLemmatizer, word_tokenize, pos_tag # <-- Added
from gensim.models import Word2Vec # <-- Added

from transformers import GPT2LMHeadModel, GPT2Tokenizer, BertModel, BertTokenizer
from rouge import Rouge
from sklearn.metrics.pairwise import cosine_similarity # <-- Already present, but needed by predict_cooking_methods
from sklearn.preprocessing import normalize # <-- Added for predict_cooking_methods
from sklearn.feature_extraction.text import TfidfVectorizer # <-- Already present

# Setup tqdm for pandas
tqdm.pandas()


print("Setting up NLTK...")
try:
    nltk.data.find('corpora/wordnet.zip')
    nltk.data.find('corpora/stopwords.zip')
    nltk.data.find('tokenizers/punkt.zip')
    nltk.data.find('taggers/averaged_perceptron_tagger.zip')
    print("NLTK data found.")
except nltk.downloader.DownloadError:
    print("Downloading necessary NLTK data...")
    nltk.download('wordnet', quiet=True)
    nltk.download('stopwords', quiet=True)
    nltk.download('punkt', quiet=True)
    nltk.download('averaged_perceptron_tagger', quiet=True)
    print("NLTK data download complete.")

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
print("NLTK components initialized.")

Setting up NLTK...
NLTK data found.
NLTK components initialized.


# 2. Constants and Model Loading (GPT-2, BERT, Word2Vec)


In [3]:
# --- GPT-2 Constants and Loading ---
SAVED_MODEL_PATH = "models/colab_model_ingredients_epochs_3"
BASE_MODEL_NAME = "gpt2"
DATASET_PATH = "dataset/RAW_merged.csv"
DATASET_SAMPLE_FRACTION = 1.0 # Use 1.0 for full dataset, < 1.0 for sampling
BERT_MODEL_NAME = 'bert-base-uncased'
TOP_K_EVALUATION = 5
W2V_MODEL_PATH = "models/word2vec_ingredients_techniques.model" # <-- Added
TECHNIQUES_VECTORS_PATH = "models/techniques_vectors.pkl" # <-- Added
W2V_PREDICTION_TOP_N = 3 # <-- Added: How many techniques to predict

print("Loading GPT-2 tokenizer...")
tokenizer_gpt = GPT2Tokenizer.from_pretrained(BASE_MODEL_NAME, bos_token='[BOS]', eos_token='[EOS]', pad_token='[PAD]')
special_tokens_dict = {'additional_special_tokens': ['[INGREDIENTS]', '[TECHNIQUES]', '[STEPS]']}
num_added_toks = tokenizer_gpt.add_special_tokens(special_tokens_dict)
print(f'Added {num_added_toks} special tokens: {tokenizer_gpt.additional_special_tokens}')

print(f"Loading fine-tuned GPT-2 model from: {SAVED_MODEL_PATH}")
model_gpt = GPT2LMHeadModel.from_pretrained(SAVED_MODEL_PATH)
model_gpt.resize_token_embeddings(len(tokenizer_gpt))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_gpt.to(device)
model_gpt.eval()
print(f"GPT-2 model loaded to: {device}")

# --- BERT Model Loading ---
print(f"Loading BERT tokenizer: {BERT_MODEL_NAME}")
tokenizer_bert = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
print(f"Loading BERT model: {BERT_MODEL_NAME}")
model_bert = BertModel.from_pretrained(BERT_MODEL_NAME)
model_bert.to(device)
model_bert.eval()
print("BERT model loaded.")

# --- Word2Vec Model and Techniques Vectors Loading (Added) ---
print("Loading Word2Vec model and techniques vectors...")
if not os.path.exists(W2V_MODEL_PATH):
    raise FileNotFoundError(f"Word2Vec model file not found at: {W2V_MODEL_PATH}")
if not os.path.exists(TECHNIQUES_VECTORS_PATH):
    raise FileNotFoundError(f"Techniques vectors file not found at: {TECHNIQUES_VECTORS_PATH}")

model_w2v = Word2Vec.load(W2V_MODEL_PATH)
print("Word2Vec model loaded.")
with open(TECHNIQUES_VECTORS_PATH, 'rb') as f:
    techniques_vectors_dict = pickle.load(f)
print("Techniques vectors loaded.")

# --- ROUGE Initializer ---
rouge = Rouge()
print("ROUGE initialized.")

Loading GPT-2 tokenizer...
Added 3 special tokens: ['[INGREDIENTS]', '[TECHNIQUES]', '[STEPS]']
Loading fine-tuned GPT-2 model from: models/colab_model_ingredients_epochs_3
GPT-2 model loaded to: cpu
Loading BERT tokenizer: bert-base-uncased
Loading BERT model: bert-base-uncased
BERT model loaded.
Loading Word2Vec model and techniques vectors...
Word2Vec model loaded.
Techniques vectors loaded.
ROUGE initialized.


# 3. Recipe Generation Function (GPT-2)

In [4]:
def generate_recipe(ingredients, techniques, model, tokenizer, max_length=820, temperature=0.7, top_k=50, top_p=0.9, num_beams=3, no_repeat_ngram_size=3, repetition_penalty=1.2):
    # Ensure techniques is a string, handle empty list case from prediction
    techniques_str = techniques if isinstance(techniques, str) else ", ".join(techniques)
    if not techniques_str: # Handle case where no techniques were predicted or provided
        print("Warning: No techniques provided or predicted. Generating without [TECHNIQUES] marker.")
        input_text = f'[BOS][INGREDIENTS]{ingredients}[TECHNIQUES][STEPS]' # Still include marker for structure
    else:
        input_text = f'[BOS][INGREDIENTS]{ingredients}[TECHNIQUES]{techniques_str}[STEPS]'

    input_ids = tokenizer(input_text, return_tensors='pt').input_ids.to(device)
    print(f"Input Text for GPT-2: {input_text}") # Debug print

    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=max_length,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            num_beams=num_beams,
            no_repeat_ngram_size=no_repeat_ngram_size,
            num_return_sequences=1,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
            do_sample=True, # Keep True if you want sampling with beams
            repetition_penalty=repetition_penalty
        )

    recipe = tokenizer.decode(output[0], skip_special_tokens=False)

    # Normalize special tokens just in case
    recipe = recipe.replace('[bos]', '[BOS]').replace('[ingredients]', '[INGREDIENTS]').replace('[techniques]', '[TECHNIQUES]').replace('[steps]', '[STEPS]').replace('[eos]', '[EOS]').replace('[pad]', '[PAD]')

    # Clean up endings more robustly
    recipe = recipe.split('[EOS]', 1)[0] + '[EOS]' if '[EOS]' in recipe else recipe
    recipe = recipe.split('[PAD]', 1)[0] if '[PAD]' in recipe else recipe

    return recipe

# 4. Highlighting Utility Function

In [5]:
# 4. Highlighting Utility Function

def print_highlighted(recipe_text, ingredients_str, techniques_str):
    highlighted_recipe = recipe_text

    # Convert techniques list to string if necessary for highlighting
    if isinstance(techniques_str, list):
        techniques_str = ", ".join(techniques_str)

    # Highlight Ingredients (Red)
    if ingredients_str:
        # Split, strip, filter empty, sort by length descending
        ingredients_list = sorted(
            [ing.strip().lower() for ing in ingredients_str.split(',') if ing.strip()],
            key=len, reverse=True
        )
        for ingredient in ingredients_list:
            # Use word boundary for more precise matching, ignore case
            pattern = r'\b' + re.escape(ingredient) + r'\b'
            highlighted_recipe = re.sub(
                pattern,
                lambda match: f'\033[91m{match.group(0)}\033[0m', # Red
                highlighted_recipe,
                flags=re.IGNORECASE
            )

    # Highlight Techniques (Green)
    if techniques_str:
        # Split, strip, filter empty, sort by length descending
        techniques_list = sorted(
            [tech.strip().lower() for tech in techniques_str.split(',') if tech.strip()],
            key=len, reverse=True
        )
        for technique in techniques_list:
            # Use word boundary at start, allow common verb endings, then boundary
            # e.g., matches 'dice', 'diced', 'dices', 'dicing' but not 'undice'
            pattern = r'\b' + re.escape(technique) + r'(?:d|ed|s|ing)?\b'
            highlighted_recipe = re.sub(
                pattern,
                lambda match: f'\033[92m{match.group(0)}\033[0m', # Green
                highlighted_recipe,
                flags=re.IGNORECASE
            )

    print(highlighted_recipe)

# 5. Utility Functions: Extractors and Word2Vec Predictor

In [6]:
# --- Existing Extractors ---
def extract_ingredients(recipe_text):
    start_marker = "[INGREDIENTS]"
    end_marker_1 = "[TECHNIQUES]"
    end_marker_2 = "[STEPS]" # Should cover cases where TECHNIQUES is missing

    start_index = recipe_text.find(start_marker)
    if start_index == -1: return ""
    content_start_index = start_index + len(start_marker)

    # Find the first occurrence of either end marker after [INGREDIENTS]
    end_index_1 = recipe_text.find(end_marker_1, content_start_index)
    end_index_2 = recipe_text.find(end_marker_2, content_start_index)

    valid_end_indices = [idx for idx in [end_index_1, end_index_2] if idx != -1]

    if not valid_end_indices:
        end_index = len(recipe_text) # Go to the end if neither marker is found
    else:
        end_index = min(valid_end_indices)

    return recipe_text[content_start_index:end_index].strip()

def extract_steps(recipe_text):
    try:
        start_marker = "[STEPS]"
        # Use rfind to get the LAST occurrence, handling potential model errors
        start_index = recipe_text.rfind(start_marker)
        if start_index == -1: return ""

        content_start_index = start_index + len(start_marker)
        end_marker_eos = "[EOS]"
        end_marker_pad = "[PAD]" # Less common, but good to check

        # Search only in the substring *after* the last [STEPS] marker
        substring_after_last_steps = recipe_text[content_start_index:]
        end_index_eos_rel = substring_after_last_steps.find(end_marker_eos)
        end_index_pad_rel = substring_after_last_steps.find(end_marker_pad)

        # Calculate absolute indices
        end_index_eos = content_start_index + end_index_eos_rel if end_index_eos_rel != -1 else -1
        end_index_pad = content_start_index + end_index_pad_rel if end_index_pad_rel != -1 else -1

        # Find the first valid end marker
        valid_end_indices = [idx for idx in [end_index_eos, end_index_pad] if idx != -1]

        if not valid_end_indices:
            end_index = len(recipe_text) # Go to the end if no marker found
        else:
            end_index = min(valid_end_indices)

        steps_text = recipe_text[content_start_index:end_index].strip()
        # Simple cleanup: remove potential leading/trailing list chars if any
        steps_text = steps_text.strip("[]' ")
        return steps_text
    except Exception as e:
        # print(f"Error during step extraction: {e}") # Optional logging
        return ""


# --- Word2Vec Prediction Functions (Added/Copied from Script 1) ---
def preprocess_ingredients(ingredients_list_input): # Takes a list of strings
    processed_ingredients = []
    regex = re.compile('[^a-zA-Z ]')
    # Using global lemmatizer and stop_words defined in Section 1.5

    # POS tags that represent nouns
    noun_tags = {'NN', 'NNS', 'NNP', 'NNPS'} # Use a set for faster lookups

    for ingr in ingredients_list_input:
        ingr = regex.sub(' ', ingr.lower()).strip()
        # Handle 'and' splitting if needed, though often better handled by POS tagging if structure is complex
        components = [comp.strip() for comp in ingr.split(' and ')] # Split on ' and '

        for comp in components:
            if not comp: continue # Skip empty components

            tokens = word_tokenize(comp)
            tagged_tokens = pos_tag(tokens)

            # Extract sequences of nouns (potential compound nouns or single nouns)
            current_noun_phrase = []
            for word, tag in tagged_tokens:
                lemma = lemmatizer.lemmatize(word.strip())
                if len(lemma) > 2 and lemma not in stop_words:
                    if tag in noun_tags:
                        current_noun_phrase.append(lemma)
                    else:
                        # If we encounter a non-noun and have a current noun phrase, add it
                        if current_noun_phrase:
                            processed_ingredients.append(" ".join(current_noun_phrase))
                            current_noun_phrase = []
                else: # Word is too short or a stopword, breaks the noun phrase
                    if current_noun_phrase:
                        processed_ingredients.append(" ".join(current_noun_phrase))
                        current_noun_phrase = []


            # Add any remaining noun phrase at the end of the component
            if current_noun_phrase:
                processed_ingredients.append(" ".join(current_noun_phrase))

    # Return unique, non-empty, reasonably long processed ingredients/phrases
    return list(set(item for item in processed_ingredients if item and len(item) > 2))


def predict_cooking_methods(ingredients_str_input, techniques_vectors, w2v_model, top_n=3):
    # Split input string into a list and strip whitespace
    ingredients = [ingredient.strip() for ingredient in ingredients_str_input.split(",") if ingredient.strip()]
    if not ingredients:
        print("Input ingredients list is empty after splitting/stripping.")
        return [] # Return empty list for no ingredients

    print(f"Original ingredients for prediction: {ingredients}")
    ingredient_list = preprocess_ingredients(ingredients) # Pass the list
    print(f"Processed ingredients for prediction: {ingredient_list}")

    # Get vectors for ingredients found in the model's vocabulary
    ingredient_vectors = [w2v_model.wv[ingredient] for ingredient in ingredient_list if ingredient in w2v_model.wv]

    if not ingredient_vectors: # Check if the list is empty
        print("Warning: None of the processed ingredients were found in the Word2Vec model vocabulary.")
        # Return an empty list instead of an error string
        return []

    # Calculate the average vector and normalize
    avg_ingredient_vector = np.mean(ingredient_vectors, axis=0).reshape(1, -1)
    normalized_avg_ingredient_vector = normalize(avg_ingredient_vector)

    # Calculate cosine similarities with normalized technique vectors
    similarities = {}
    for technique, technique_vector in techniques_vectors.items():
        # Ensure technique_vector is a numpy array and reshape, then normalize
        technique_vector_np = np.array(technique_vector).reshape(1, -1)
        normalized_technique_vector = normalize(technique_vector_np)
        # Calculate cosine similarity
        similarity_score = cosine_similarity(normalized_avg_ingredient_vector, normalized_technique_vector)[0][0]
        similarities[technique] = similarity_score

    # Sort techniques by similarity score in descending order
    sorted_techniques = sorted(similarities, key=similarities.get, reverse=True)

    # Return the top_n techniques or an empty list if none found
    return sorted_techniques[:top_n] if sorted_techniques else []

# 6. Similarity Calculation Functions (TF-IDF, BERT)

In [7]:
# --- Existing Similarity Functions ---
def calculate_ingredient_similarity(ingredients1_str, ingredients2_str):
    if not ingredients1_str or not ingredients2_str: return 0.0
    try:
        # Simple TF-IDF Cosine Similarity
        vectorizer = TfidfVectorizer().fit([ingredients1_str, ingredients2_str])
        vectors = vectorizer.transform([ingredients1_str, ingredients2_str]).toarray()
        # Handle empty vocabulary case
        if vectors.shape[1] == 0: return 0.0
        # Calculate cosine similarity
        cosine_sim = cosine_similarity(vectors)
        # Return similarity between the two documents (off-diagonal element)
        return max(0.0, min(1.0, cosine_sim[0, 1])) # Clamp between 0 and 1
    except ValueError: # Catch potential errors during vectorization
        # print(f"TF-IDF Error for:\n1: {ingredients1_str}\n2: {ingredients2_str}") # Optional Debug
        return 0.0
    except Exception as e:
        # print(f"Unexpected TF-IDF Error: {e}") # Optional Debug
        return 0.0

def get_bert_embedding(text, model, tokenizer):
    inputs = tokenizer(text, return_tensors='pt', max_length=512, truncation=True, padding='max_length') # Use max_length padding
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
        # Mean pooling of the last hidden state, considering attention mask
        last_hidden_state = outputs.last_hidden_state
        attention_mask = inputs['attention_mask']
        mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * mask_expanded, 1)
        sum_mask = torch.clamp(mask_expanded.sum(1), min=1e-9) # Avoid division by zero
        pooled_embedding = sum_embeddings / sum_mask
    return pooled_embedding.cpu().numpy() # Return numpy array

def calculate_bert_similarity(text1, text2, model, tokenizer):
    if not text1 or not text2: return 0.0
    try:
        embedding1 = get_bert_embedding(text1, model, tokenizer)
        embedding2 = get_bert_embedding(text2, model, tokenizer)
        # Calculate cosine similarity using sklearn function
        similarity = cosine_similarity(embedding1, embedding2).item()
        # Clamp the result between 0 and 1
        return max(0.0, min(1.0, similarity))
    except Exception as e:
        # print(f"BERT Similarity Error: {e}") # Optional logging
        return 0.0


# 7. Main Evaluation Function

In [8]:
def evaluate_generated_recipe_by_ingredients(
    generated_recipe_full_string,
    real_recipes_data_list,
    model_bert,
    tokenizer_bert,
    rouge,
    top_k=5,
    ):

    if not real_recipes_data_list:
        print("Error: Real recipes data list is empty for evaluation.")
        return [] # Return empty list on error

    # Minimal check for expected keys in the first item (if list is not empty)
    if real_recipes_data_list and not all(k in real_recipes_data_list[0] for k in ['ingredients_string', 'recipe_string', 'avg_rating']):
         print("Warning: Real recipe data dictionaries might be missing required keys ('ingredients_string', 'recipe_string', 'avg_rating').")

    generated_ingredients = extract_ingredients(generated_recipe_full_string)
    generated_steps = extract_steps(generated_recipe_full_string)

    if not generated_steps:
        print("Error: Could not extract steps from the generated recipe. Evaluation aborted.")
        return [] # Cannot evaluate without steps
    if not generated_ingredients:
        print("Warning: Could not extract ingredients from generated recipe. Ingredient similarity calculation might be affected.")

    ingredient_similarities = []
    # Use leave=False for cleaner progress bar in loops within functions
    pbar_ing = tqdm(real_recipes_data_list, desc="Evaluating: Ingredient Sim", leave=False)
    for i, real_recipe_data in enumerate(pbar_ing):
        real_ingredients = real_recipe_data.get('ingredients_string', '')
        similarity = calculate_ingredient_similarity(generated_ingredients, real_ingredients)
        ingredient_similarities.append({'real_recipe_data': real_recipe_data, 'ingredient_similarity': similarity})

    # Sort by ingredient similarity descending
    ingredient_similarities.sort(key=lambda x: x['ingredient_similarity'], reverse=True)
    # Select top K candidates
    top_k_candidates = ingredient_similarities[:top_k]

    if not top_k_candidates:
        print("No similar real recipes found based on ingredients after sorting.")
        return [] # No candidates to evaluate further

    evaluation_results = []

    pbar_scores = tqdm(top_k_candidates, desc="Evaluating: Scores", leave=False)
    for candidate in pbar_scores:
        real_data = candidate['real_recipe_data']
        ing_sim = candidate['ingredient_similarity']
        # Extract steps from the real recipe string stored in the dictionary
        real_steps = extract_steps(real_data.get('recipe_string', '')) # Use extractor here too

        if not real_steps:
            continue # Skip if real steps cannot be extracted

        # Calculate ROUGE-L F1
        rouge_l_f1 = 0.0
        try:
            # Ensure both strings are non-empty before calculating ROUGE
            if generated_steps and real_steps:
                rouge_scores = rouge.get_scores(generated_steps, real_steps)
                # Check if scores were returned and contain the expected structure
                if rouge_scores and isinstance(rouge_scores, list) and len(rouge_scores) > 0:
                    rouge_l_f1 = rouge_scores[0].get('rouge-l', {}).get('f', 0.0)
        except Exception as e:
             print(f"DROUGE calculation failed: {e}")


        # Calculate BERT Similarity
        bert_similarity = 0.0
        try:
             # Ensure both strings are non-empty
             if generated_steps and real_steps:
                 bert_similarity = calculate_bert_similarity(generated_steps, real_steps, model_bert, tokenizer_bert)
        except Exception as e:
             print(f"BERT similarity calculation failed: {e}")

        # Get real average rating, handle potential missing key or non-numeric value
        real_rating = real_data.get('avg_rating', np.nan)
        if not isinstance(real_rating, (int, float)):
            real_rating = np.nan # Ensure it's NaN if not numeric

        evaluation_results.append({
            'real_recipe_data': real_data,
            'ingredient_similarity': ing_sim,
            'rouge_l_f1': rouge_l_f1,
            'bert_similarity': bert_similarity,
            'real_avg_rating': real_rating
        })

    return evaluation_results


# 8. Load and Prepare Real Recipe Data

In [9]:
def load_preprocess_raw_data(raw_data_path):
    recipes_data = []
    print(f"Loading data from: {raw_data_path}")
    try:
        with open(raw_data_path, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            # Use tqdm here for progress on reading the CSV
            for row in tqdm(reader, desc="Reading CSV", unit=" rows"):
                try:
                    # Use .get() with defaults to avoid KeyError
                    ingredients_raw = row.get('ingredients', '[]')
                    instructions_raw = row.get('steps', '[]')
                    techniques_raw = row.get('techniques_list', '[]')
                    recipe_id = row.get('id', 'N/A')

                    # Safely evaluate lists, default to empty list on error
                    try: ingredients_list = ast.literal_eval(ingredients_raw) if isinstance(ingredients_raw, str) and ingredients_raw.startswith('[') else []
                    except (ValueError, SyntaxError): ingredients_list = []
                    try: instructions_list = ast.literal_eval(instructions_raw) if isinstance(instructions_raw, str) and instructions_raw.startswith('[') else []
                    except (ValueError, SyntaxError): instructions_list = []
                    try: techniques_list = ast.literal_eval(techniques_raw) if isinstance(techniques_raw, str) and techniques_raw.startswith('[') else []
                    except (ValueError, SyntaxError): techniques_list = []

                    # Join lists into strings, handle non-string items just in case
                    ingredients_str = ", ".join(filter(None, map(str, ingredients_list))).lower()
                    instructions_str = " ".join(filter(None, map(str, instructions_list))).lower() # Join steps with space
                    techniques_str = ", ".join(filter(None, map(str, techniques_list))).lower()

                    # Process rating
                    avg_rating = np.nan
                    try:
                        # Prioritize 'avg_rating', fall back to 'rating'
                        rating_val = row.get('avg_rating', row.get('rating', None))
                        if rating_val is not None and rating_val != '':
                            avg_rating = float(rating_val)
                    except (ValueError, TypeError):
                        pass # Keep avg_rating as np.nan if conversion fails

                    # Basic validation: need ingredients and instructions
                    if ingredients_str and instructions_str:
                        # Construct the full recipe string for potential reference/debugging
                        # Always include markers for consistency
                        if techniques_str:
                            recipe_instance_string = f'[BOS][INGREDIENTS]{ingredients_str}[TECHNIQUES]{techniques_str}[STEPS]{instructions_str}[EOS]'
                        else:
                            recipe_instance_string = f'[BOS][INGREDIENTS]{ingredients_str}[TECHNIQUES][STEPS]{instructions_str}[EOS]' # Empty techniques marker

                        recipes_data.append({
                            'id': recipe_id, # Keep ID for reference
                            'recipe_string': recipe_instance_string, # Full string representation
                            'ingredients_string': ingredients_str,   # Comma-separated ingredients
                            'techniques_string': techniques_str,     # Comma-separated techniques
                            'steps_string': instructions_str,        # Space-joined steps
                            'avg_rating': avg_rating                 # Float rating or NaN
                        })
                except Exception as e:
                    print(f"Error processing row ID {row.get('id', 'N/A')}: {e}. Row data: {row}")
                    continue

    except FileNotFoundError:
        print(f"ERROR: Dataset file not found at: {raw_data_path}")
        return []
    except Exception as e:
        print(f"ERROR reading CSV file '{raw_data_path}': {e}")
        return []

    print(f"Loaded {len(recipes_data)} valid recipes.")
    return recipes_data

# --- Load and Sample Data ---
full_recipes_data_list = load_preprocess_raw_data(DATASET_PATH)

sampled_recipes_data_list = []
if full_recipes_data_list:
    # Decide whether to sample or use the full dataset
    if 0.0 < DATASET_SAMPLE_FRACTION < 1.0:
        sample_size = max(1, int(DATASET_SAMPLE_FRACTION * len(full_recipes_data_list)))
        print(f"Sampling {sample_size} recipes ({DATASET_SAMPLE_FRACTION*100:.1f}% of {len(full_recipes_data_list)})...")
        # Ensure sample size doesn't exceed population size (edge case)
        sample_size = min(sample_size, len(full_recipes_data_list))
        sampled_recipes_data_list = random.sample(full_recipes_data_list, sample_size)
    elif DATASET_SAMPLE_FRACTION >= 1.0:
         print(f"Using the full loaded dataset ({len(full_recipes_data_list)} recipes) for evaluation.")
         sampled_recipes_data_list = full_recipes_data_list
    else: # Handle fraction <= 0
         print("Warning: DATASET_SAMPLE_FRACTION is zero or negative. No recipes selected for evaluation.")

    print(f"Number of real recipes available for evaluation: {len(sampled_recipes_data_list)}")
else:
    print("ERROR: No recipes loaded. Evaluation cannot proceed.")

Loading data from: dataset/RAW_merged.csv


Reading CSV: 0 rows [00:00, ? rows/s]

Loaded 178265 valid recipes.
Using the full loaded dataset (178265 recipes) for evaluation.
Number of real recipes available for evaluation: 178265


# 9. Execution: Generation and Evaluation (Using Predicted Techniques)

In [20]:
 # 9. Execution: Generation and Evaluation (Using Predicted Techniques)

ingredients = "salt, water, salame, icecream, oregano, chicken, tomato, cheese, pasta, garlic, onion"

print(f"Input Ingredients: {ingredients}")
print("-" * 30)

# --- Predict Techniques using Word2Vec (MODIFIED SECTION) ---
print("Predicting techniques using Word2Vec...")
predicted_techniques_list = predict_cooking_methods(
    ingredients_str_input=ingredients,
    techniques_vectors=techniques_vectors_dict,
    w2v_model=model_w2v,
    top_n=W2V_PREDICTION_TOP_N
)

# Convert list to comma-separated string for generation input
techniques = ", ".join(predicted_techniques_list)

if techniques:
    print(f"Predicted Techniques (Top {W2V_PREDICTION_TOP_N}): {techniques}")
else:
    print("No relevant techniques predicted by Word2Vec.")
print("-" * 30)


# --- Generate Recipe using Predicted Techniques ---
print("Generating recipe with GPT-2...")
generated_recipe_output = generate_recipe(
    ingredients=ingredients,
    techniques=techniques, # Use the predicted techniques string
    model=model_gpt,
    tokenizer=tokenizer_gpt,
    # You can adjust generation parameters here if needed
    # temperature=0.75, top_k=60, top_p=0.95, num_beams=4, repetition_penalty=1.3
)
print("\n" + "-" * 30 + "\n")
# --- Print Generated Recipe with Highlighting ---
print("Generated Recipe:")
# Pass the original ingredients and the *predicted* techniques string for highlighting
print_highlighted(generated_recipe_output, ingredients, techniques)
print("\n" + "-" * 30 + "\n")


# --- Evaluate Generated Recipe ---
if sampled_recipes_data_list:
    print(f"\nStarting evaluation against {len(sampled_recipes_data_list)} real recipes (Comparing with Top {TOP_K_EVALUATION} similar)...")

    # Pass the full generated text to the evaluation function
    evaluation_results = evaluate_generated_recipe_by_ingredients(
        generated_recipe_full_string=generated_recipe_output,
        real_recipes_data_list=sampled_recipes_data_list,
        model_bert=model_bert,
        tokenizer_bert=tokenizer_bert,
        rouge=rouge,
        top_k=TOP_K_EVALUATION,
        debug=False # Set to True for more verbose evaluation steps
    )

    # --- Display Evaluation Results ---
    if evaluation_results: # Check if evaluation returned any results
        print(f"\n--- Evaluation Results (Comparing against Top {len(evaluation_results)} Real Recipes Based on Ingredient Similarity) ---")

        valid_ratings = [] # To calculate average rating of benchmarks
        all_rouge_l = []
        all_bert_sim = []
        all_ing_sim = []

        for i, result in enumerate(evaluation_results):
            print(f"\n--- Real Benchmark #{i + 1} ---")
            real_data = result.get('real_recipe_data', {})
            real_rating = result.get('real_avg_rating', np.nan)
            ing_sim = result.get('ingredient_similarity', 0.0)
            rouge_l = result.get('rouge_l_f1', 0.0)
            bert_sim = result.get('bert_similarity', 0.0)

            print(f"Ingredient Similarity: {ing_sim:.4f}")
            all_ing_sim.append(ing_sim)

            if not np.isnan(real_rating):
                 print(f"Real Rating: {real_rating:.2f} stars")
                 valid_ratings.append(real_rating)
            else:
                 print("Real Recipe Avg Rating: N/A")

            print(f"ROUGE-L F1: {rouge_l:.4f}")
            all_rouge_l.append(rouge_l)
            print(f"BERT Similarity: {bert_sim:.4f}")
            all_bert_sim.append(bert_sim)

            # Display the text of the real benchmark recipe for comparison
            print("\nReal Recipe Text (Benchmark):")
            # Highlight the REAL recipe using the INPUT ingredients/techniques for consistency
            print_highlighted(real_data.get('recipe_string', 'N/A'), ingredients, techniques)
            print("-" * 20) # Separator between benchmark recipes

        # Calculate and print average scores
        avg_rouge = np.nanmean(all_rouge_l) if all_rouge_l else 0.0
        avg_bert_sim = np.nanmean(all_bert_sim) if all_bert_sim else 0.0
        avg_ing_sim = np.nanmean(all_ing_sim) if all_ing_sim else 0.0
        avg_real_rating = np.nanmean(valid_ratings) if valid_ratings else np.nan

        print("\n--- Average Scores Across Top Benchmarks ---")
        print(f"Avg Ingredient Similarity: {avg_ing_sim:.4f}")
        if not np.isnan(avg_real_rating):
             print(f"Avg Rating of these Real Benchmarks: {avg_real_rating:.2f} stars (based on {len(valid_ratings)} rated recipes)")
        else:
             print("Avg Rating of these Real Benchmarks: N/A")
        print(f"Avg ROUGE-L F1: {avg_rouge:.4f}")
        print(f"Avg BERT Similarity: {avg_bert_sim:.4f}")
    else:
        print("\nEvaluation completed, but no benchmark recipes were found suitable for comparison (e.g., due to extraction errors or zero similarity).")

else:
    print("\nEvaluation skipped: No real recipe data loaded or sampled.")

print("\nExecution complete.")

Input Ingredients: salt, water, salame, icecream, oregano, chicken, tomato, cheese, pasta, garlic, onion
------------------------------
Predicting techniques using Word2Vec...
Original ingredients for prediction: ['salt', 'water', 'salame', 'icecream', 'oregano', 'chicken', 'tomato', 'cheese', 'pasta', 'garlic', 'onion']
Processed ingredients for prediction: ['water', 'oregano', 'onion', 'icecream', 'tomato', 'chicken', 'salt', 'pasta', 'garlic', 'salame', 'cheese']
Predicted Techniques (Top 3): parboil, dice, saute
------------------------------
Generating recipe with GPT-2...
Input Text for GPT-2: [BOS][INGREDIENTS]salt, water, salame, icecream, oregano, chicken, tomato, cheese, pasta, garlic, onion[TECHNIQUES]parboil, dice, saute[STEPS]

------------------------------

Generated Recipe:
[BOS] [INGREDIENTS] [91msalt[0m, [91mwater[0m, [91msalame[0m, [91micecream[0m, [91moregano[0m, [91mchicken[0m, [91mtomato[0m, [91mcheese[0m, [91mpasta[0m, [91mgarlic[0m, [91monio

KeyboardInterrupt: 