In [4]:
import pandas as pd 
import json
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
with open("full_format_recipes.json") as json_data:
    recipe_data = json.load(json_data)

In [3]:
# Extract relevant fields
recipes = []
for recipe in recipe_data:
    if "title" in recipe and "ingredients" in recipe:
        title = recipe["title"]
        ingredients = recipe["ingredients"]
        directions = recipe["directions"]
        recipes.append({"title": title, "ingredients": ", ".join(ingredients), "directions": directions})

df = pd.DataFrame(recipes)
print(f"Loaded {len(df)} recipes.")

Loaded 20111 recipes.


In [4]:
csv_path = "epi_r.csv"  
csv_data = pd.read_csv(csv_path)
merged_data = pd.merge(df, csv_data, on="title", how="inner")

In [5]:
merged_data.head(10)

Unnamed: 0,title,ingredients,directions,rating,calories,protein,fat,sodium,#cakeweek,#wasteless,...,yellow squash,yogurt,yonkers,yuca,zucchini,cookbooks,leftovers,snack,snack week,turkey
0,"Lentil, Apple, and Turkey Wrap","4 cups low-sodium vegetable or chicken stock, ...","[1. Place the stock, lentils, celery, carrot, ...",2.5,426.0,30.0,7.0,559.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,Boudin Blanc Terrine with Red Onion Confit,"1 1/2 cups whipping cream, 2 medium onions, ch...",[Combine first 9 ingredients in heavy medium s...,4.375,403.0,18.0,23.0,1439.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Potato and Fennel Soup Hodge,"1 fennel bulb (sometimes called anise), stalks...",[In a large heavy saucepan cook diced fennel a...,3.75,165.0,6.0,7.0,165.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Mahi-Mahi in Tomato Olive Sauce,"2 tablespoons extra-virgin olive oil, 1 cup ch...",[Heat oil in heavy large skillet over medium-h...,5.0,,,,,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Spinach Noodle Casserole,"1 12-ounce package frozen spinach soufflé, tha...",[Preheat oven to 350°F. Lightly grease 8x8x2-i...,3.125,547.0,20.0,32.0,452.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,The Best Blts,2 1/2 cups (lightly packed) fresh basil leaves...,"[Mix basil, mayonnaise and butter in processor...",4.375,948.0,19.0,79.0,1042.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Ham and Spring Vegetable Salad with Shallot Vi...,"1 1/2 pounds small red-skinned potatoes, each ...",[Cook potatoes and carrots in large pot of boi...,4.375,,,,,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Spicy-Sweet Kumquats,"6 tablespoons granulated sugar, 1 1/2 tablespo...",[Stir together sugar and chili powder. Whisk e...,3.75,,,,,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Korean Marinated Beef,"1/4 cup soy sauce, 1 tablespoon sugar, 2 teasp...","[Stir together soy sauce, sugar, sesame oil, w...",4.375,170.0,7.0,10.0,1272.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Ham Persillade with Mustard Potato Salad and M...,"6 long parsley sprigs, divided, 1 3/4 cups red...",[Chop enough parsley leaves to measure 1 table...,3.75,602.0,23.0,41.0,1696.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
ingredients = merged_data["ingredients"].values
tfidf_vectorizer = TfidfVectorizer(stop_words="english", max_features=1000)

# Apply TF-IDF vectorization to the ingredients
ingredients_tfidf = tfidf_vectorizer.fit_transform(ingredients).toarray()

In [7]:
rules = {}

for _, row in merged_data.iterrows():
    recipe_title = row["title"]
    ingredients = row["ingredients"].split(", ")  
    rules[recipe_title] = ingredients

In [8]:
def forward_chaining(user_ingredients, rules, tfidf_vectorizer, ingredients_tfidf):
    """
    Use forward chaining to match recipes based on user input ingredients.
    
    Args:
    - user_ingredients: List of ingredients provided by the user.
    - rules: A dictionary containing recipes and their required ingredients.
    - tfidf_vectorizer: The TF-IDF vectorizer used to vectorize ingredients.
    - ingredients_tfidf: The pre-computed TF-IDF matrix for all recipes.
    
    Returns:
    - List of recipes that can be made with the given ingredients, along with calorie and rating information.
    """
    matching_recipes = []
    
    # Convert user ingredients to lowercase for case-insensitive matching
    user_ingredients = [ingredient.lower() for ingredient in user_ingredients]
    
    # Transform the user ingredients into the TF-IDF vector space
    user_ingredients_vector = tfidf_vectorizer.transform([", ".join(user_ingredients)]).toarray()
    
    # Compute cosine similarity between the user's ingredients and all recipe vectors
    cosine_similarities = cosine_similarity(user_ingredients_vector, ingredients_tfidf).flatten()
    
    # Match recipes based on cosine similarity
    # Only select recipes with a similarity score above a certain threshold
    threshold = 0.4  
    for index, score in enumerate(cosine_similarities):
        if score >= threshold:
            recipe = merged_data.iloc[index]
            matching_recipes.append({
                "title": recipe["title"],
                "calories": recipe["calories"],
                "rating": recipe["rating"],
            })
    
    return matching_recipes


In [12]:
# Ask user for ingredients
user_input = input("Please enter the ingredients you have (separated by commas): ").strip()
user_ingredients = [ingredient.strip() for ingredient in user_input.split(",")]

Please enter the ingredients you have (separated by commas): pasta, spinach, parmesan cheese, onion, garlic


In [13]:
recommended_recipes = forward_chaining(user_ingredients, rules, tfidf_vectorizer, ingredients_tfidf)

In [14]:
# Display the recommended recipes
print("\nRecommended Recipes:")
if recommended_recipes:
    for i, recipe in enumerate(recommended_recipes, start=1):
        print(f"{i}. {recipe['title']} - Calories: {recipe['calories']}, Rating: {recipe['rating']:.1f}")
else:
    print("No recipes found matching your criteria.")

try:
    selected_index = int(input("\nEnter the number of the recipe you want to see instructions for: ")) - 1
    if 0 <= selected_index < len(recommended_recipes):
        selected_recipe = recommended_recipes[selected_index]  

        selected_recipe_details = merged_data[merged_data["title"] == selected_recipe["title"]]
        if not selected_recipe_details.empty:
            print(f"\nInstructions for '{selected_recipe['title']}':")
            instructions = selected_recipe_details.iloc[0]["directions"]
            if isinstance(instructions, str):  
                print(instructions)
            elif isinstance(instructions, list):  
                for step in instructions:
                    print(f"- {step}")
            else:
                print("Instructions not available.")
        else:
            print("Recipe not found!")
    else:
        print("Invalid choice! Please select a valid recipe number.")
except ValueError:
    print("Invalid input. Please enter a number corresponding to a recipe.")



Recommended Recipes:
1. Farfalle with Spinach and Garbanzo Beans  - Calories: nan, Rating: 3.1
2. Spinach- and Cheese- Stuffed Pasta Shells  - Calories: 637.0, Rating: 3.8
3. Semolina and Spinach Gratin  - Calories: 294.0, Rating: 3.8
4. Egg Ribbon and Parmesan Soup  - Calories: 267.0, Rating: 3.8
5. Campanelle Pasta with Burrata Cheese, Spinach, Lemon, and Toasted Almonds  - Calories: nan, Rating: 4.4
6. Scrambled Egg Pasta  - Calories: 595.0, Rating: 3.8
7. Farfalle with Baby Spinach, Walnuts, and Ricotta  - Calories: nan, Rating: 3.1
8. Pasta with Butternut Squash and Spinach  - Calories: 583.0, Rating: 3.8
9. Pasta with Three Cheeses Brown  - Calories: 798.0, Rating: 4.4
10. Chicken Soup with Stars and Meatballs  - Calories: 632.0, Rating: 4.4

Enter the number of the recipe you want to see instructions for: 2

Instructions for 'Spinach- and Cheese- Stuffed Pasta Shells ':
- Squeeze spinach dry. Transfer spinach to large bowl. Add ricotta, 1/2 cup Parmesan, fennel, basil and garli