In [16]:
import json
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display
import ipywidgets as widgets

# Load JSON data from a file
file_path = 'Recipes.json'
recipes = []
with open(file_path, 'r') as file:
    for line in file:
        recipes.append(json.loads(line.strip()))

# Extracting all unique ingredients
ingredients = set()
recipe_names = []  # List to store recipe names
for recipe in recipes:
    recipe_names.append(recipe['name'])  # Add recipe names
    for ingredient in recipe['ingredients']:
        ingredients.add(ingredient['ingredient'].lower())

ingredient_list = sorted(ingredients)

# Create an ingredient presence matrix
df = pd.DataFrame(0, index=np.arange(len(recipes)), columns=ingredient_list)
for i, recipe in enumerate(recipes):
    for ingredient in recipe['ingredients']:
        if ingredient['ingredient'].lower() in df.columns:
            df.at[i, ingredient['ingredient'].lower()] = 1

# Perform clustering
k = 5  # Number of clusters
kmeans = KMeans(n_clusters=k, random_state=42)
clusters = kmeans.fit_predict(df)

# Adding cluster IDs and names to DataFrame
df['Cluster'] = clusters
cluster_readable_names = {
    0: "Dairy & Baking",
    1: "Spices & Herbs",
    2: "Meat Heavy",
    3: "Vegetarian Friendly",
    4: "Desserts"
}
df['Cluster Name'] = [cluster_readable_names[i] for i in clusters]

# Interactive functionality to explore clusters
cluster_selector = widgets.Dropdown(
    options=[(name, i) for i, name in enumerate(cluster_readable_names.values())],
    value=0,
    description='Cluster:',
)
output_area = widgets.Output()

def display_cluster_info(change):
    output_area.clear_output()
    with output_area:
        cluster_id = change['new']
        selected_recipes = df[df['Cluster'] == cluster_id]
        print(f"Recipes in the {cluster_readable_names[cluster_id]} cluster:")
        for index in selected_recipes.index:
            print(recipe_names[index])  # Displaying recipe names instead of indices

cluster_selector.observe(display_cluster_info, names='value')
display(cluster_selector, output_area)




Dropdown(description='Cluster:', options=(('Dairy & Baking', 0), ('Spices & Herbs', 1), ('Meat Heavy', 2), ('Vâ€¦

Output()