In [1]:
import json

# Load JSON data
with open('all_recipes.json', 'r') as file:
    data = json.load(file)

# Inspect a few entries to understand the structure
print(list(data.items())[:2])
print(len(data.items()))

[('Red Lentil Soup with Chicken and Turnips', [{'name': 'diced additional toppings: avocado', 'amount': 8.0, 'unit': 'servings'}, {'name': 'diced carrots', 'amount': 3.0, 'unit': 'medium'}, {'name': 'diced celery stalks', 'amount': 3.0, 'unit': ''}, {'name': 'shredded chicken breast', 'amount': 280.0, 'unit': 'g'}, {'name': 'italian flat leaf parsley', 'amount': 30.0, 'unit': 'g'}, {'name': 'garlic', 'amount': 6.0, 'unit': 'cloves'}, {'name': 'olive oil', 'amount': 2.0, 'unit': 'Tbsps'}, {'name': 'canned tomatoes', 'amount': 793.787, 'unit': 'g'}, {'name': 'red dried lentils', 'amount': 360.0, 'unit': 'g'}, {'name': 'black salt and pepper', 'amount': 8.0, 'unit': 'servings'}, {'name': 'diced turnip', 'amount': 1.0, 'unit': 'large'}, {'name': 'vegetable stock', 'amount': 1.88, 'unit': 'l'}, {'name': 'yellow diced onion', 'amount': 1.0, 'unit': 'medium'}]), ('Asparagus and Pea Soup: Real Convenience Food', [{'name': 'frozen asparagus', 'amount': 1.0, 'unit': 'bag'}, {'name': 'evoo', 'amo

In [2]:
# Initialize sets for unique recipes and ingredients
recipes = list(data.keys())
ingredients = set()

# Extract unique ingredients
for recipe, ingredient_list in data.items():
    for ingredient in ingredient_list:
        ingredients.add(ingredient["name"])  # Add ingredient name to the set

ingredients = list(ingredients)  # Convert to list for easier indexing
print(ingredient)


{'name': 'taco seasoning', 'amount': 2.0, 'unit': 'Tbsps'}


In [3]:
# Dictionary to store recipe-ingredient edges
edges = []

# Generate edges (recipe, ingredient) for the graph
for recipe, ingredient_list in data.items():
    for ingredient in ingredient_list:
        ingredient_name = ingredient["name"]
        edges.append((recipe, ingredient_name))


In [4]:
# Create mappings for recipe and ingredient nodes
recipe_to_idx = {recipe: idx for idx, recipe in enumerate(recipes)}
ingredient_to_idx = {ingredient: idx + len(recipes) for idx, ingredient in enumerate(ingredients)}  # Offset by recipe count

# Convert edges to index pairs for easier use in a GNN
indexed_edges = [(recipe_to_idx[recipe], ingredient_to_idx[ingredient]) for recipe, ingredient in edges]


In [5]:
import torch
from torch_geometric.data import Data

# Convert edges to PyTorch tensors
edge_index = torch.tensor(indexed_edges, dtype=torch.long).t().contiguous()

# Create a PyTorch Geometric data object
data = Data(edge_index=edge_index)


In [6]:
"""
PREPROCESSING PART IS DONE
"""

'\nPREPROCESSING PART IS DONE\n'

In [15]:
from sklearn.metrics.pairwise import cosine_similarity

def get_cosine_similarity(embedding_a, embedding_b):
    return cosine_similarity(embedding_a.reshape(1, -1), embedding_b.reshape(1, -1))[0][0]


In [16]:
# Import necessary libraries
from torch_geometric.nn import GCNConv, SAGEConv, GATConv
import torch.nn.functional as F
import torch

# Define the GNN Model
class RecipeGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(RecipeGNN, self).__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Initialize the GNN model, optimizer, and set up embeddings
model = RecipeGNN(in_channels=64, hidden_channels=128, out_channels=64)  # Adjust input/output dimensions as needed
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Dummy node features (use random or pre-trained features if available)
x = torch.randn(len(recipes) + len(ingredients), 64)  # Random features for each node

# Train the GNN model
epochs = 50
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    out = model(x, edge_index)  # Forward pass
    loss = F.mse_loss(out, x)   # Use MSE or other appropriate loss
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")

# Function to recommend recipes based on multiple liked recipes
def recommend_by_liked_recipes(liked_recipes_indices, embeddings, top_n=5):
    liked_embeddings = [embeddings[idx] for idx in liked_recipes_indices]
    profile = torch.stack(liked_embeddings).mean(dim=0)  # Average profile of liked recipes
    
    similarities = []
    for idx, embedding in enumerate(embeddings[:len(recipe_to_idx)]):  # Only compare to recipe nodes
        similarity = get_cosine_similarity(profile, embedding)
        similarities.append((idx, similarity))
    
    similarities = sorted(similarities, key=lambda x: x[1], reverse=True)
    recommended_recipes = [recipes[idx] for idx, _ in similarities[:top_n]]
    return recommended_recipes


Epoch 1/50, Loss: 1.1198545694351196
Epoch 2/50, Loss: 1.06256103515625
Epoch 3/50, Loss: 0.9156192541122437
Epoch 4/50, Loss: 0.8262745141983032
Epoch 5/50, Loss: 0.7702227830886841
Epoch 6/50, Loss: 0.7072864770889282
Epoch 7/50, Loss: 0.6375679969787598
Epoch 8/50, Loss: 0.5778673887252808
Epoch 9/50, Loss: 0.5344282388687134
Epoch 10/50, Loss: 0.4904997646808624
Epoch 11/50, Loss: 0.43910449743270874
Epoch 12/50, Loss: 0.40081557631492615
Epoch 13/50, Loss: 0.37692660093307495
Epoch 14/50, Loss: 0.3432007133960724
Epoch 15/50, Loss: 0.3148876428604126
Epoch 16/50, Loss: 0.3012235164642334
Epoch 17/50, Loss: 0.2782951891422272
Epoch 18/50, Loss: 0.25947195291519165
Epoch 19/50, Loss: 0.2490740865468979
Epoch 20/50, Loss: 0.23184284567832947
Epoch 21/50, Loss: 0.2193581759929657
Epoch 22/50, Loss: 0.21040451526641846
Epoch 23/50, Loss: 0.19748157262802124
Epoch 24/50, Loss: 0.1901119500398636
Epoch 25/50, Loss: 0.18227773904800415
Epoch 26/50, Loss: 0.1739710122346878
Epoch 27/50, Lo

In [17]:
liked_recipes=["Fusilli With Zucchini Flowers, Ricotta and Saffron", "Roast Cauliflower Salad with Green Beans and Cherry Tomatoes"]
liked_indices = [recipe_to_idx[recipe] for recipe in liked_recipes if recipe in recipe_to_idx]

# Ensure we have exactly 2 liked recipes
if len(liked_indices) == 2:
    # Get recipe embeddings from the trained model
    model.eval()
    embeddings = model(x, edge_index).detach()  # Run model in eval mode, detach to prevent grad tracking

    # Recommend based on the embeddings of liked recipes
    recommended_recipes = recommend_by_liked_recipes(liked_indices, embeddings)
    print("Recommended Recipes:", recommended_recipes)
else:
    print("Please ensure you have two liked recipes in the list.")

Recommended Recipes: ['Fusilli With Zucchini Flowers, Ricotta and Saffron', 'Roast Cauliflower Salad with Green Beans and Cherry Tomatoes', 'Donkatsu - Korean Breaded Pork Cutlet', 'Luscious Palak Paneer', 'Chipotle Black Bean Soup with Avocado Cream']


In [None]:
from torch_geometric.nn import Node2Vec
import torch

# Assume edge_index and node_count are already defined
node2vec = Node2Vec(edge_index, embedding_dim=64, walk_length=10, context_size=5, walks_per_node=10)
loader = node2vec.loader(batch_size=128, shuffle=True)
optimizer = torch.optim.Adam(node2vec.parameters(), lr=0.01)

# Training loop
for epoch in range(10):  # Adjust epoch count as needed
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = node2vec.loss(pos_rw, neg_rw)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch}, Loss: {loss.item()}")

# Get embeddings for recipes and ingredients
embeddings = node2vec.embedding.weight.data


In [17]:
def recommend_recipes(recipe_idx, embeddings, top_n=5):
    recipe_embedding = embeddings[recipe_idx]
    similarities = []

    for idx, embedding in enumerate(embeddings):
        if idx != recipe_idx:  # Exclude the input recipe itself
            similarity = get_cosine_similarity(recipe_embedding, embedding)
            similarities.append((idx, similarity))

    # Sort recipes by similarity and get top-N recommendations
    similarities = sorted(similarities, key=lambda x: x[1], reverse=True)
    recommended_recipes = [idx for idx, sim in similarities[:top_n]]
    return recommended_recipes


In [None]:
def recommend_by_ingredients(preferred_ingredients, ingredient_to_idx, embeddings, top_n=5):
    # Get embeddings for preferred ingredients
    ingredient_embeddings = [embeddings[ingredient_to_idx[ingredient]] for ingredient in preferred_ingredients]
    
    # Average embeddings to create an "ingredient profile"
    ingredient_profile = sum(ingredient_embeddings) / len(ingredient_embeddings)
    
    # Calculate similarity between the ingredient profile and each recipe
    similarities = []
    for idx, recipe_embedding in enumerate(embeddings[:len(recipe_to_idx)]):  # Only consider recipe embeddings
        similarity = get_cosine_similarity(ingredient_profile, recipe_embedding)
        similarities.append((idx, similarity))

    # Sort recipes by similarity and get top-N recommendations
    similarities = sorted(similarities, key=lambda x: x[1], reverse=True)
    recommended_recipes = [idx for idx, sim in similarities[:top_n]]
    return recommended_recipes
