In [5]:
import json
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

# Load the JSON Lines file and create a co-occurrence matrix
file_path = 'Recipes.json'
recipes = []
with open(file_path, 'r') as file:
    for line in file:
        recipe = json.loads(line.strip())
        recipes.append(recipe)

# Extracting all unique ingredients
ingredients = set()
for recipe in recipes:
    for ingredient in recipe['ingredients']:
        ingredients.add(ingredient['ingredient'].lower())

# Create a co-occurrence matrix
co_occurrence = {ing: {ing2: 0 for ing2 in ingredients} for ing in ingredients}
for recipe in recipes:
    recipe_ingredients = [ing['ingredient'].lower() for ing in recipe['ingredients']]
    for i in range(len(recipe_ingredients)):
        for j in range(i + 1, len(recipe_ingredients)):
            co_occurrence[recipe_ingredients[i]][recipe_ingredients[j]] += 1
            co_occurrence[recipe_ingredients[j]][recipe_ingredients[i]] += 1

# Convert to DataFrame for easier handling
co_occurrence_df = pd.DataFrame(co_occurrence)
"""
# Create graph from the co-occurrence matrix
G = nx.from_pandas_adjacency(co_occurrence_df)
threshold = 1  # Set a threshold for visual clarity
to_remove = [node for node, degree in dict(G.degree()).items() if degree <= threshold]
G.remove_nodes_from(to_remove)

# Draw the network
plt.figure(figsize=(14, 14))
pos = nx.spring_layout(G, k=0.15, iterations=20)
sizes = [G.degree[node]*100 for node in G]
nx.draw_networkx_nodes(G, pos, node_size=sizes, node_color='lightblue', alpha=0.6)
nx.draw_networkx_edges(G, pos, alpha=0.4)
nx.draw_networkx_labels(G, pos, font_size=8, font_family='sans-serif')
plt.title('Ingredient Co-occurrence Network')
plt.axis('off')
plt.show()
"""
# Define a function to get top co-occurrences
def get_top_cooccurrences(ingredient, matrix, top_n=3):
    if ingredient in matrix.columns:
        cooccurrences = matrix[ingredient]
        top_cooccurrences = cooccurrences.sort_values(ascending=False).head(top_n + 1)
        return top_cooccurrences.index[1:]  # Skip self
    else:
        return "Ingredient not found."

# Create interactive input widget for ingredient input
text_input = widgets.Text(value='', placeholder='Type an ingredient', description='Ingredient:', disabled=False)
output_area = widgets.Output()

def on_text_submit(change):
    output_area.clear_output()
    with output_area:
        ingredient = change['new'].lower()
        top_ingredients = get_top_cooccurrences(ingredient, co_occurrence_df, 3)
        if isinstance(top_ingredients, str):
            print(top_ingredients)
        else:
            print("Top co-occurring ingredients with", ingredient, "are:", ', '.join(top_ingredients))

text_input.observe(on_text_submit, names='value')
display(text_input, output_area)


Text(value='', description='Ingredient:', placeholder='Type an ingredient')

Output()