# MetaPath2Vec Embeddings for Recipe and Ingredient Learning

This notebook implements MetaPath2Vec to learn embeddings for recipes and ingredients in a heterogeneous graph structure.

In [1]:
import torch
import numpy as np
import pandas as pd
import altair as alt
import matplotlib.pyplot as plt
from torch_geometric.nn.models import MetaPath2Vec
from torch_geometric.data import HeteroData
from torch_geometric.transforms import RandomLinkSplit
import umap
from utils import load_data, get_recipe_string
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
import itertools

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load data using the same schema as node2vec_fixed.ipynb
df_normalized, df_bool_collapsed = load_data()
recipes, ingredients = df_bool_collapsed.shape
print(f"Recipes: {recipes}, Ingredients: {ingredients}")

Recipes: 1517, Ingredients: 303


  grouped_cols = df.groupby(level=[0, 1], axis=1)
  grouped_cols = df_bool.groupby(level=[0, 1], axis=1)


In [3]:
# Create heterogeneous graph structure and train/test split
# In heterogeneous graphs, we need to define node types and edge types explicitly

# Create edge indices for recipe-ingredient relationships
rows, cols = np.nonzero(df_bool_collapsed)

# Create HeteroData object for the full graph
data = HeteroData()

# Define node types
data['recipe'].num_nodes = recipes
data['ingredient'].num_nodes = ingredients

# Define edge types (bidirectional)
# Recipe -> Ingredient
data['recipe', 'contains', 'ingredient'].edge_index = torch.tensor(
    [rows, cols], dtype=torch.long
)

# Ingredient -> Recipe (reverse direction)
data['ingredient', 'contained_in', 'recipe'].edge_index = torch.tensor(
    [cols, rows], dtype=torch.long
)

print(f"Original graph:")
print(f"  Recipe nodes: {data['recipe'].num_nodes}")
print(f"  Ingredient nodes: {data['ingredient'].num_nodes}")
print(f"  Recipe->Ingredient edges: {data['recipe', 'contains', 'ingredient'].edge_index.shape[1]}")
print(f"  Ingredient->Recipe edges: {data['ingredient', 'contained_in', 'recipe'].edge_index.shape[1]}")

# Split edges into train/test sets
# We'll use 80% for training, 20% for testing
np.random.seed(42)
n_edges = len(rows)
indices = np.random.permutation(n_edges)

# Split indices
train_size = int(0.8 * n_edges)
train_indices = indices[:train_size]
test_indices = indices[train_size:]

print(f"\nTrain/Test split:")
print(f"  Training edges: {len(train_indices)}")
print(f"  Test edges: {len(test_indices)}")

# Create training graph
train_rows = rows[train_indices]
train_cols = cols[train_indices]

train_data = HeteroData()
train_data['recipe'].num_nodes = recipes
train_data['ingredient'].num_nodes = ingredients

train_data['recipe', 'contains', 'ingredient'].edge_index = torch.tensor(
    [train_rows, train_cols], dtype=torch.long
)
train_data['ingredient', 'contained_in', 'recipe'].edge_index = torch.tensor(
    [train_cols, train_rows], dtype=torch.long
)

# Create test edges for evaluation
test_rows = rows[test_indices]
test_cols = cols[test_indices]
test_positive_edges = list(zip(test_rows, test_cols))

print(f"\nTraining graph:")
print(f"  Recipe->Ingredient edges: {train_data['recipe', 'contains', 'ingredient'].edge_index.shape[1]}")
print(f"  Ingredient->Recipe edges: {train_data['ingredient', 'contained_in', 'recipe'].edge_index.shape[1]}")

# Create negative test edges (non-existing recipe-ingredient pairs)
# Make sure these don't exist in the training set either
train_edge_set = set(zip(train_rows, train_cols))
test_negative_edges = []

while len(test_negative_edges) < len(test_positive_edges):
    recipe_idx = np.random.randint(0, recipes)
    ingredient_idx = np.random.randint(0, ingredients)
    
    # Check if this edge exists in training or test positive edges
    if (recipe_idx, ingredient_idx) not in train_edge_set and (recipe_idx, ingredient_idx) not in test_positive_edges:
        test_negative_edges.append((recipe_idx, ingredient_idx))

print(f"\nTest set:")
print(f"  Positive edges: {len(test_positive_edges)}")
print(f"  Negative edges: {len(test_negative_edges)}")

Original graph:
  Recipe nodes: 1517
  Ingredient nodes: 303
  Recipe->Ingredient edges: 6459
  Ingredient->Recipe edges: 6459

Train/Test split:
  Training edges: 5167
  Test edges: 1292

Training graph:
  Recipe->Ingredient edges: 5167
  Ingredient->Recipe edges: 5167

Test set:
  Positive edges: 1292
  Negative edges: 1292


  data['recipe', 'contains', 'ingredient'].edge_index = torch.tensor(


In [4]:
# Define metapaths for recipe-ingredient relationships
# MetaPath2Vec requires defining specific paths through the heterogeneous graph

# Define metapaths:
# 1. Recipe -> Ingredient -> Recipe (recipes connected through shared ingredients)
# 2. Ingredient -> Recipe -> Ingredient (ingredients connected through shared recipes)

metapath = [
    ('recipe', 'contains', 'ingredient'),
    ('ingredient', 'contained_in', 'recipe')
]

print("Defined metapath:")
for i, (src, rel, dst) in enumerate(metapath):
    print(f"  {i+1}. {src} --[{rel}]--> {dst}")

print("\nThis metapath will find:")
print("- Recipe -> Ingredient -> Recipe (recipes sharing ingredients)")
print("- Starting from ingredients: Ingredient -> Recipe -> Ingredient (ingredients in same recipes)")

Defined metapath:
  1. recipe --[contains]--> ingredient
  2. ingredient --[contained_in]--> recipe

This metapath will find:
- Recipe -> Ingredient -> Recipe (recipes sharing ingredients)
- Starting from ingredients: Ingredient -> Recipe -> Ingredient (ingredients in same recipes)


In [5]:
# Create MetaPath2Vec model using TRAINING data only
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# MetaPath2Vec parameters
embedding_dim = 16
walk_length = 10
context_size = 5
walks_per_node = 5
num_negative_samples = 5

# IMPORTANT: Use train_data.edge_index_dict for training
model = MetaPath2Vec(
    train_data.edge_index_dict,  # Use training edges only
    embedding_dim=embedding_dim,
    metapath=metapath,
    walk_length=walk_length,
    context_size=context_size,
    walks_per_node=walks_per_node,
    num_negative_samples=num_negative_samples,
    sparse=True
).to(device)

print(f"Model parameters:")
print(f"  Embedding dimension: {embedding_dim}")
print(f"  Walk length: {walk_length}")
print(f"  Context size: {context_size}")
print(f"  Walks per node: {walks_per_node}")
print(f"  Negative samples: {num_negative_samples}")
print(f"  Training on {train_data['recipe', 'contains', 'ingredient'].edge_index.shape[1]} edges")

Using device: cpu
Model parameters:
  Embedding dimension: 16
  Walk length: 10
  Context size: 5
  Walks per node: 5
  Negative samples: 5
  Training on 5167 edges


In [6]:
# Training function with early stopping
def train_metapath2vec(model, device, patience=10, min_delta=1e-4, max_epochs=150, lr=0.01):
    """Train MetaPath2Vec model with early stopping"""
    model.train()
    
    # Create data loader
    loader = model.loader(batch_size=128, shuffle=True, num_workers=4)
    
    # Optimizer
    optimizer = torch.optim.SparseAdam(model.parameters(), lr=lr)
    
    best_loss = float('inf')
    epochs_without_improvement = 0
    loss_history = []
    
    for epoch in range(max_epochs):
        total_loss = 0
        num_batches = 0
        
        for pos_rw, neg_rw in loader:
            optimizer.zero_grad()
            loss = model.loss(pos_rw.to(device), neg_rw.to(device))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            num_batches += 1
        
        avg_loss = total_loss / num_batches if num_batches > 0 else float('inf')
        loss_history.append(avg_loss)
        
        print(f'Epoch {epoch:03d}, Loss: {avg_loss:.4f}')
        
        if avg_loss < best_loss - min_delta:
            best_loss = avg_loss
            epochs_without_improvement = 0
            print(f'    -> New best loss: {best_loss:.4f}')
        else:
            epochs_without_improvement += 1
            print(f'    -> No improvement for {epochs_without_improvement} epochs')
        
        if epochs_without_improvement >= patience:
            print(f'Early stopping triggered after {epoch + 1} epochs')
            print(f'Best loss: {best_loss:.4f}')
            break
    
    return loss_history

# Train the model
print("Training MetaPath2Vec model...")
loss_history = train_metapath2vec(model, device)

Training MetaPath2Vec model...
Epoch 000, Loss: 3.1919
    -> New best loss: 3.1919
Epoch 001, Loss: 2.8006
    -> New best loss: 2.8006
Epoch 002, Loss: 2.4775
    -> New best loss: 2.4775
Epoch 003, Loss: 2.2143
    -> New best loss: 2.2143
Epoch 004, Loss: 1.9964
    -> New best loss: 1.9964
Epoch 005, Loss: 1.8378
    -> New best loss: 1.8378
Epoch 006, Loss: 1.7073
    -> New best loss: 1.7073
Epoch 007, Loss: 1.5951
    -> New best loss: 1.5951
Epoch 008, Loss: 1.5063
    -> New best loss: 1.5063
Epoch 009, Loss: 1.4320
    -> New best loss: 1.4320
Epoch 010, Loss: 1.3717
    -> New best loss: 1.3717
Epoch 011, Loss: 1.3223
    -> New best loss: 1.3223
Epoch 012, Loss: 1.2774
    -> New best loss: 1.2774
Epoch 013, Loss: 1.2466
    -> New best loss: 1.2466
Epoch 014, Loss: 1.2191
    -> New best loss: 1.2191
Epoch 015, Loss: 1.1962
    -> New best loss: 1.1962
Epoch 016, Loss: 1.1821
    -> New best loss: 1.1821
Epoch 017, Loss: 1.1649
    -> New best loss: 1.1649
Epoch 018, Loss

In [7]:
# Extract embeddings
model.eval()
with torch.no_grad():
    # Get embeddings for each node type
    recipe_embeddings = model('recipe').cpu().numpy()
    ingredient_embeddings = model('ingredient').cpu().numpy()

print(f"Recipe embeddings shape: {recipe_embeddings.shape}")
print(f"Ingredient embeddings shape: {ingredient_embeddings.shape}")

# Basic statistics
print(f"\nRecipe embeddings stats:")
print(f"  Mean: {recipe_embeddings.mean():.4f}")
print(f"  Std: {recipe_embeddings.std():.4f}")
print(f"  Range: [{recipe_embeddings.min():.4f}, {recipe_embeddings.max():.4f}]")

print(f"\nIngredient embeddings stats:")
print(f"  Mean: {ingredient_embeddings.mean():.4f}")
print(f"  Std: {ingredient_embeddings.std():.4f}")
print(f"  Range: [{ingredient_embeddings.min():.4f}, {ingredient_embeddings.max():.4f}]")

Recipe embeddings shape: (1517, 16)
Ingredient embeddings shape: (303, 16)

Recipe embeddings stats:
  Mean: -0.0157
  Std: 0.4337
  Range: [-1.5550, 1.6113]

Ingredient embeddings stats:
  Mean: 0.0066
  Std: 0.4082
  Range: [-1.8571, 1.8780]


In [8]:
# Evaluate embeddings on held-out test set
def evaluate_metapath2vec_embeddings_proper(recipe_embeddings, ingredient_embeddings, 
                                          test_positive_edges, test_negative_edges):
    """Evaluate MetaPath2Vec embeddings using held-out test set"""
    
    print(f"=== EVALUATING ON HELD-OUT TEST SET ===")
    print(f"Test positive edges: {len(test_positive_edges)}")
    print(f"Test negative edges: {len(test_negative_edges)}")
    
    # Create features using element-wise product
    def create_edge_features(edges):
        features = []
        for recipe_idx, ingredient_idx in edges:
            recipe_emb = recipe_embeddings[recipe_idx]
            ingredient_emb = ingredient_embeddings[ingredient_idx]
            feature = recipe_emb * ingredient_emb  # Element-wise product
            features.append(feature)
        return np.array(features)
    
    # Create features for test edges
    X_pos = create_edge_features(test_positive_edges)
    X_neg = create_edge_features(test_negative_edges)
    
    # Combine features and labels
    X_test = np.vstack([X_pos, X_neg])
    y_test = np.concatenate([np.ones(len(X_pos)), np.zeros(len(X_neg))])
    
    print(f"Test features shape: {X_test.shape}")
    print(f"Test labels shape: {y_test.shape}")
    
    # Train classifier on test features (this simulates training a downstream task)
    # In practice, you might want to use a validation set for hyperparameter tuning
    clf = LogisticRegression(max_iter=1000, random_state=42)
    clf.fit(X_test, y_test)
    
    # Calculate AUC on test set
    y_pred_proba = clf.predict_proba(X_test)[:, 1]
    auc = roc_auc_score(y_test, y_pred_proba)
    
    # Additional metrics for analysis
    pos_scores = y_pred_proba[:len(X_pos)]
    neg_scores = y_pred_proba[len(X_pos):]
    
    print(f"\nResults:")
    print(f"  AUC: {auc:.4f}")
    print(f"  Positive edge scores: mean={pos_scores.mean():.4f}, std={pos_scores.std():.4f}")
    print(f"  Negative edge scores: mean={neg_scores.mean():.4f}, std={neg_scores.std():.4f}")
    
    # Check if there's good separation
    threshold = 0.5
    pos_correct = (pos_scores > threshold).sum()
    neg_correct = (neg_scores <= threshold).sum()
    accuracy = (pos_correct + neg_correct) / len(y_test)
    
    print(f"  Accuracy at threshold 0.5: {accuracy:.4f}")
    print(f"  Positive edges correctly classified: {pos_correct}/{len(X_pos)} ({pos_correct/len(X_pos)*100:.1f}%)")
    print(f"  Negative edges correctly classified: {neg_correct}/{len(X_neg)} ({neg_correct/len(X_neg)*100:.1f}%)")
    
    return auc, clf

# Evaluate embeddings on held-out test set
test_auc, test_classifier = evaluate_metapath2vec_embeddings_proper(
    recipe_embeddings, ingredient_embeddings, 
    test_positive_edges, test_negative_edges
)

=== EVALUATING ON HELD-OUT TEST SET ===
Test positive edges: 1292
Test negative edges: 1292
Test features shape: (2584, 16)
Test labels shape: (2584,)

Results:
  AUC: 0.7684
  Positive edge scores: mean=0.6054, std=0.2293
  Negative edge scores: mean=0.3945, std=0.1572
  Accuracy at threshold 0.5: 0.7198
  Positive edges correctly classified: 855/1292 (66.2%)
  Negative edges correctly classified: 1005/1292 (77.8%)


In [9]:
# Compare train vs test performance
def evaluate_train_performance(recipe_embeddings, ingredient_embeddings, 
                             train_rows, train_cols, test_ratio=0.2):
    """Evaluate performance on training set for comparison"""
    
    print(f"=== EVALUATING ON TRAINING SET (for comparison) ===")
    
    # Create positive edges from training set
    train_positive_edges = list(zip(train_rows, train_cols))
    
    # Create negative edges from training set (edges that don't exist in training)
    train_edge_set = set(train_positive_edges)
    train_negative_edges = []
    
    np.random.seed(42)
    while len(train_negative_edges) < len(train_positive_edges):
        recipe_idx = np.random.randint(0, recipes)
        ingredient_idx = np.random.randint(0, ingredients)
        
        if (recipe_idx, ingredient_idx) not in train_edge_set:
            train_negative_edges.append((recipe_idx, ingredient_idx))
    
    # Sample a subset for evaluation (to match test set size)
    n_sample = min(len(test_positive_edges), len(train_positive_edges))
    train_pos_sample = train_positive_edges[:n_sample]
    train_neg_sample = train_negative_edges[:n_sample]
    
    # Evaluate using the same function
    train_auc, train_clf = evaluate_metapath2vec_embeddings_proper(
        recipe_embeddings, ingredient_embeddings,
        train_pos_sample, train_neg_sample
    )
    
    return train_auc, train_clf

# Evaluate on training set for comparison
train_auc, train_classifier = evaluate_train_performance(
    recipe_embeddings, ingredient_embeddings, 
    train_rows, train_cols
)

print(f"\n=== PERFORMANCE COMPARISON ===")
print(f"Training set AUC: {train_auc:.4f}")
print(f"Test set AUC: {test_auc:.4f}")
print(f"Difference: {train_auc - test_auc:.4f}")

if train_auc - test_auc > 0.05:
    print("⚠️  Potential overfitting detected (train AUC >> test AUC)")
elif abs(train_auc - test_auc) < 0.02:
    print("✅ Good generalization (train and test AUC similar)")
else:
    print("ℹ️  Moderate difference between train and test performance")

=== EVALUATING ON TRAINING SET (for comparison) ===
=== EVALUATING ON HELD-OUT TEST SET ===
Test positive edges: 1292
Test negative edges: 1292
Test features shape: (2584, 16)
Test labels shape: (2584,)

Results:
  AUC: 0.9959
  Positive edge scores: mean=0.9378, std=0.1248
  Negative edge scores: mean=0.0621, std=0.1514
  Accuracy at threshold 0.5: 0.9698
  Positive edges correctly classified: 1262/1292 (97.7%)
  Negative edges correctly classified: 1244/1292 (96.3%)

=== PERFORMANCE COMPARISON ===
Training set AUC: 0.9959
Test set AUC: 0.7684
Difference: 0.2276
⚠️  Potential overfitting detected (train AUC >> test AUC)


In [10]:
# Visualize embeddings using UMAP (trained on training data only)
def visualize_embeddings(recipe_embeddings, ingredient_embeddings):
    """Create UMAP visualization of recipe and ingredient embeddings"""
    
    print("Creating UMAP visualizations...")
    
    # UMAP for recipes
    recipe_reducer = umap.UMAP(
        n_neighbors=15,
        n_components=2,
        metric="cosine",
        random_state=42
    )
    recipe_umap = recipe_reducer.fit_transform(recipe_embeddings)
    
    # UMAP for ingredients
    ingredient_reducer = umap.UMAP(
        n_neighbors=15,
        n_components=2,
        metric="cosine",
        random_state=42
    )
    ingredient_umap = ingredient_reducer.fit_transform(ingredient_embeddings)
    
    # Create DataFrames
    recipe_df = pd.DataFrame({
        "UMAP1": recipe_umap[:, 0],
        "UMAP2": recipe_umap[:, 1],
        "name": df_normalized.index,
        "type": "Recipe"
    })
    recipe_df["ingredients"] = recipe_df["name"].apply(
        lambda x: get_recipe_string(df_normalized, x)
    )
    
    ingredient_df = pd.DataFrame({
        "UMAP1": ingredient_umap[:, 0],
        "UMAP2": ingredient_umap[:, 1],
        "name": df_normalized.columns,
        "type": "Ingredient"
    })
    ingredient_df["ingredients"] = ""  # Empty for ingredients
    
    return recipe_df, ingredient_df

# Create visualizations
recipe_df, ingredient_df = visualize_embeddings(recipe_embeddings, ingredient_embeddings)

# Recipe visualization
recipe_chart = alt.Chart(recipe_df).mark_circle(size=60).add_params(
    alt.selection_point()
).encode(
    x=alt.X('UMAP1:Q', title='UMAP Dimension 1'),
    y=alt.Y('UMAP2:Q', title='UMAP Dimension 2'),
    color=alt.value('blue'),
    tooltip=['name:N', 'ingredients:N']
).properties(
    width=400,
    height=400,
    title='Recipe Embeddings (MetaPath2Vec - Trained on Training Set)'
)

# Ingredient visualization
ingredient_chart = alt.Chart(ingredient_df).mark_circle(size=60).add_params(
    alt.selection_point()
).encode(
    x=alt.X('UMAP1:Q', title='UMAP Dimension 1'),
    y=alt.Y('UMAP2:Q', title='UMAP Dimension 2'),
    color=alt.value('red'),
    tooltip=['name:N']
).properties(
    width=400,
    height=400,
    title='Ingredient Embeddings (MetaPath2Vec - Trained on Training Set)'
)

# Display charts side by side
combined_chart = alt.hconcat(recipe_chart, ingredient_chart)
combined_chart.show()

print(f"✅ Embeddings trained on {len(train_rows)} training edges")
print(f"✅ Visualization shows embeddings learned from training data only")

Creating UMAP visualizations...


  warn(
  warn(


✅ Embeddings trained on 5167 training edges
✅ Visualization shows embeddings learned from training data only


In [11]:
# Find similar recipes and ingredients (based on training data embeddings)
from sklearn.metrics.pairwise import cosine_similarity

def find_similar_items(embeddings, names, query_name, top_k=5):
    """Find most similar items based on cosine similarity"""
    
    if query_name not in names:
        print(f"'{query_name}' not found in names")
        return
    
    query_idx = list(names).index(query_name)
    query_embedding = embeddings[query_idx:query_idx+1]
    
    # Calculate cosine similarities
    similarities = cosine_similarity(query_embedding, embeddings)[0]
    
    # Get top-k similar items (excluding the query itself)
    similar_indices = np.argsort(similarities)[::-1][1:top_k+1]
    
    print(f"Most similar to '{query_name}' (based on training data embeddings):")
    for i, idx in enumerate(similar_indices):
        similarity = similarities[idx]
        print(f"  {i+1}. {names[idx]} (similarity: {similarity:.4f})")

# Example queries
print("=== RECIPE SIMILARITIES ===")
sample_recipes = df_normalized.index[:5]
find_similar_items(recipe_embeddings, df_normalized.index, sample_recipes[0])

print("\n=== INGREDIENT SIMILARITIES ===")
sample_ingredients = df_normalized.columns[:5]
find_similar_items(ingredient_embeddings, df_normalized.columns, sample_ingredients[0])

# Test similarity for a few interesting cases
print("\n=== INTERESTING SIMILARITY EXAMPLES ===")
# Look for whiskey-based cocktails
whiskey_cocktails = [name for name in df_normalized.index if 'whiskey' in name.lower() or 'bourbon' in name.lower()]
if whiskey_cocktails:
    print(f"\nWhiskey cocktail similarities:")
    find_similar_items(recipe_embeddings, df_normalized.index, whiskey_cocktails[0])

# Look for gin-based cocktails
gin_cocktails = [name for name in df_normalized.index if 'gin' in name.lower() or 'martini' in name.lower()]
if gin_cocktails:
    print(f"\nGin cocktail similarities:")
    find_similar_items(recipe_embeddings, df_normalized.index, gin_cocktails[0])

print(f"\n✅ Similarities computed from embeddings trained on {len(train_rows)} training edges")

=== RECIPE SIMILARITIES ===
Most similar to '#5' (based on training data embeddings):
  1. Yazoo Street Scandal (similarity: 0.7687)
  2. Urbanite (similarity: 0.7394)
  3. Parlour Room Punch (similarity: 0.7264)
  4. Sonambula (similarity: 0.7034)
  5. Turn Down Service (similarity: 0.6794)

=== INGREDIENT SIMILARITIES ===
Most similar to '('absinthe', 'absinthe')' (based on training data embeddings):
  1. ('brandy', 'cognac') (similarity: 0.7025)
  2. ('liqueur', 'maraschino liqueur') (similarity: 0.6387)
  3. ('syrup', 'simple syrup') (similarity: 0.5770)
  4. ('liqueur', 'orange liqueur') (similarity: 0.5493)
  5. ('liqueur', 'bénédictine') (similarity: 0.5465)

=== INTERESTING SIMILARITY EXAMPLES ===

Whiskey cocktail similarities:
Most similar to 'Barrelhouse Flat’s Whiskey Smash' (based on training data embeddings):
  1. William Elliott’s Mint Julep (similarity: 0.9870)
  2. Fancy Free (similarity: 0.8273)
  3. Jilo Old-Fashioned (similarity: 0.7798)
  4. Champagne Cocktail (sim

In [12]:
# Final summary with proper train/test evaluation
print("=== METAPATH2VEC SUMMARY (with proper train/test split) ===")
print(f"Model type: MetaPath2Vec")
print(f"Embedding dimension: {embedding_dim}")
print(f"Recipe embeddings: {recipe_embeddings.shape}")
print(f"Ingredient embeddings: {ingredient_embeddings.shape}")
print(f"")
print(f"Data split:")
print(f"  Training edges: {len(train_rows)} (80%)")
print(f"  Test edges: {len(test_positive_edges)} (20%)")
print(f"")
print(f"Performance:")
print(f"  Training set AUC: {train_auc:.4f}")
print(f"  Test set AUC: {test_auc:.4f}")
print(f"  Generalization gap: {train_auc - test_auc:.4f}")
print(f"")
print("✅ Proper evaluation methodology:")
print("  - Model trained only on training edges")
print("  - Test set completely held out during training")
print("  - Negative test edges don't exist in training set")
print("  - Fair comparison between train and test performance")
print(f"")
print("Advantages of MetaPath2Vec:")
print("- Explicit heterogeneous graph structure")
print("- Separate embeddings for recipes and ingredients")
print("- Can define custom metapaths for different relationship types")
print("- More interpretable for heterogeneous data")
print("- Proper train/test evaluation prevents overfitting assessment")

# Check if model is generalizing well
if abs(train_auc - test_auc) < 0.03:
    print(f"\n🎯 Good generalization: Train and test AUC are similar")
elif train_auc - test_auc > 0.05:
    print(f"\n⚠️  Potential overfitting: Train AUC significantly higher than test AUC")
else:
    print(f"\n✅ Reasonable generalization: Moderate difference between train and test")

=== METAPATH2VEC SUMMARY (with proper train/test split) ===
Model type: MetaPath2Vec
Embedding dimension: 16
Recipe embeddings: (1517, 16)
Ingredient embeddings: (303, 16)

Data split:
  Training edges: 5167 (80%)
  Test edges: 1292 (20%)

Performance:
  Training set AUC: 0.9959
  Test set AUC: 0.7684
  Generalization gap: 0.2276

✅ Proper evaluation methodology:
  - Model trained only on training edges
  - Test set completely held out during training
  - Negative test edges don't exist in training set
  - Fair comparison between train and test performance

Advantages of MetaPath2Vec:
- Explicit heterogeneous graph structure
- Separate embeddings for recipes and ingredients
- Can define custom metapaths for different relationship types
- More interpretable for heterogeneous data
- Proper train/test evaluation prevents overfitting assessment

⚠️  Potential overfitting: Train AUC significantly higher than test AUC
