In [None]:
import json
from pathlib import Path
import networkx as nx
import random
import math

json_path = Path("..", "..", "raw_data", "foodcom", "food.com.recipe_normalized.json")

recipes_json = json.load(json_path.open())

print("Total Recipes", len(recipes_json))
print("Sum of edge list", sum([len(recipes_json[x]["ingredientes"]) for x in recipes_json]))


def build_recipe_graph(json: dict, with_cap=False, recipe_probability=1.0) -> nx.Graph:
    G_json = nx.Graph()
    
    recipes = random.sample(list(json.keys()), math.floor(len(json) * recipe_probability)) 

    for node in json.values():
        kwargs = {}
        if with_cap:
            kwargs["cap"] = node['cap']
        G_json.add_node(node['nombre'], **kwargs)
    
    print("Start to build graph", len(recipes))

    for i, recipe1 in enumerate(recipes):
        recipe1_ingredients_names_set = set(map(lambda x: x['nombre'], json[recipe1]['ingredientes']))
        for recipe2 in recipes[i+1:]:
            recipe2_ingredients_names = list(map(lambda x: x['nombre'], json[recipe2]['ingredientes']))
            common_ingredients = recipe1_ingredients_names_set.intersection(recipe2_ingredients_names)
            all_ingredients = recipe1_ingredients_names_set.union(recipe2_ingredients_names)
            if common_ingredients:
                jaccard = len(common_ingredients) / len(all_ingredients)
                G_json.add_edge(recipe1, recipe2, weight=jaccard)
    return G_json

def build_ingredient_graph(json: dict) -> nx.Graph:
    G_json = nx.Graph()
    
    ingredients = set()
    for recipe in json:
        ingredients.update(list(map(lambda x: x['nombre'], json[recipe]["ingredientes"])))
    ingredients = list(ingredients)

    ingredient_dict = { x: set() for x in ingredients }
    for recipe in json:
        for ingr in list(map(lambda x: x['nombre'], json[recipe]["ingredientes"])):
            ingredient_dict[ingr].add(recipe)

    for node in ingredients:
        G_json.add_node(node)

    print("Start to build graph", len(ingredients))
    
    for i, ingredient1 in enumerate(ingredients):
        recipe1_ingredients_names_set = ingredient_dict[ingredient1]
        for ingredient2 in ingredients[i+1:]:
            recipe2_ingredients_names = ingredient_dict[ingredient2]

            # Alternate way of intersection and union            
            # common_recipes = []
            # all_recipes = []
            # for x in recipe1_ingredients_names_set:
            #     if x in recipe2_ingredients_names:
            #         common_recipes.append(x)
            #     all_recipes.append(x)
            # for x in recipe2_ingredients_names:
            #     if x not in common_recipes:
            #         all_recipes.append(x)

            common_recipes = recipe1_ingredients_names_set.intersection(recipe2_ingredients_names)
            all_recipes = recipe1_ingredients_names_set.union(recipe2_ingredients_names)
            if common_recipes:
                jaccard = len(common_recipes) / len(all_recipes)
                G_json.add_edge(ingredient1, ingredient2, weight=jaccard)
    return G_json

def build_ingredient_recipe_graph(json: dict, bipartite=True) -> nx.Graph:
    G_json = nx.Graph()
    
    ingredients = set()
    for recipe in json:
        ingredients.update(list(map(lambda x: x['nombre'], json[recipe]["ingredientes"])))
    ingredients = list(ingredients)

    for node in ingredients:
        G_json.add_node(node + ("_ingredient" if bipartite else ""), type="ingredient")

    for node in json:
        G_json.add_node(node + ("_recipe" if bipartite else ""), type="recipe")

    for recipe in json:
        for ingredient in json[recipe]["ingredientes"]:
            G_json.add_edge(recipe + ("_recipe" if bipartite else ""), ingredient["nombre"] + ("_ingredient" if bipartite else ""))

    return G_json


In [None]:
graph_path = Path("..", "..", "data", "graphs", "foodcom")

# Works 30 seconds
# G8 = build_ingredient_recipe_graph(recipes_json)
# nx.write_graphml(G8, graph_path / "bipartite_recipe_ingredient.graphml")

# Works 17-20 minutes
# G7 = build_ingredient_graph(recipes_json)
# nx.write_graphml(G7, graph_path / "ingredient_node_weighted.graphml")

# TOO EXPENSIVE
# recipe_prob = 0.25
# G6 = build_recipe_graph(recipes_json, recipe_probability=recipe_prob)
# nx.write_graphml(G6, graph_path / f"recipe_{recipe_prob}_node_weighted.graphml")
