In [44]:
import pandas as pd
import numpy as np
import re
from fuzzywuzzy import process
import ast
import tensorflow as tf

In [46]:
# Import recipes dataset
path = "C:/Users/sarah/OneDrive/Desktop/Grad School/BIA-662 (Augmented Intelligence and Gen AI)/Final Project/RAW_recipes.csv/"
recipes = pd.read_csv(path+"RAW_recipes.csv")

In [47]:
# Check for missing values in the dataset
recipes.isnull().sum()

name                 1
id                   0
minutes              0
contributor_id       0
submitted            0
tags               109
nutrition            0
n_steps              0
steps                1
description       4979
ingredients          0
n_ingredients        0
dtype: int64

In [48]:
# drop rows with missing values
clean_recipes = recipes.dropna(axis=0)

In [49]:
# Check for missing values after dropping rows
clean_recipes.isnull().sum()

name              0
id                0
minutes           0
contributor_id    0
submitted         0
tags              0
nutrition         0
n_steps           0
steps             0
description       0
ingredients       0
n_ingredients     0
dtype: int64

# IBM Watson Assitant Chatbot

In [51]:
import json
import ibm_watson
from ibm_watson import AssistantV2
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

In [52]:
def ChefSourWhiskersBot():
    # Create an IAM authenticator
    authenticator = IAMAuthenticator('B2I0oCVKm-5ZZsLEF5RGB9EqMrVC6w677RitlxRY4G3C') # put our API key here

    # Create session with the Watson Assistant
    assistant = AssistantV2(
        version='2024-06-18',
        authenticator=authenticator
    )

    # Set the service URL
    assistant.set_service_url('https://api.us-east.assistant.watson.cloud.ibm.com/instances/77c47b93-e26c-420b-bfde-64d0142d7ca9')  # Adjust the URL to the correct region of our chatbot

    # Assign our assitant ID
    assistant_id = 'fe0c833f-6d69-4eff-9cb5-6b39faad49a8' # put our assitant id here

    # Create a session id
    session = assistant.create_session(assistant_id=assistant_id).get_result()
    session_id = session['session_id']
    
    # Pull the first prompt from the Dialog
    response = assistant.message(
            assistant_id=assistant_id,
            session_id=session_id).get_result()

    # Continue prompting the user and getting their input, until they indicate
    # it's time to quit
    while True:

        # Get the text of the prompt
        prompt = response.get("output").get("text")
  
        # Display all of the text provided in the prompt
        for text in prompt:
            print(text)
 
        # Get the user's next utterance
        utterance = input("==> ")

        # Invoke Watson to assess the intent of the utterance and determine how
        # to respond to the user
        response = assistant.message(
                workspace_id='...', # put specific workspace_id here
                input={'text': utterance},
                context=response.get("context")).get_result()

        # Ensure there are intents in the response.
        if len(response.get("intents")) > 0:
            
            #Check whether the dialog indicates an end to the conversation
            if response["intents"][0]["intent"] == "General_Ending":
                if len(response.get("output").get("text")) > 0:
                    # If there are any remaining messages in the response then
                    # print them out.
                    print(response.get("output").get("text")[0] + '\n')
                    # And terminate the conversation.
                    break

In [38]:
# Call ChefBot
ChefSourWhiskersBot()

ApiException: Error: Resource not found, Status code: 404 , X-global-transaction-id: 48058510-2530-4b53-b146-749414480439

# Main Code

In [59]:
# split ingedients column into separate ingedients
## RENAME SO THAT IT APPLIES TO MULTIPLE COLUMNS NOT JUST INGREDIENTS
def split_ingredients(ingredients_string):
    return re.split(r'\s*\,\s*', ingredients_string)

In [61]:
# create new column with list of ingredients
clean_recipes['ingredient_list'] = clean_recipes['ingredients'].apply(split_ingredients)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_recipes['ingredient_list'] = clean_recipes['ingredients'].apply(split_ingredients)


In [62]:
# create new column with list of ingredients
clean_recipes['tag_list'] = clean_recipes['tags'].apply(split_ingredients)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_recipes['tag_list'] = clean_recipes['tags'].apply(split_ingredients)


In [63]:
# dictionary to store ingredient counts
ingredient_counts = {}
# dictionary to store recipes containing each ingredient
recipes_with_ingredient = {}

for index, row in clean_recipes.iterrows():
    recipe_name = row['name']
    # Split the ingredient string into a list
    ingredients = [ing.strip() for ing in row['ingredients'].split(',')]
    
    for ingredient in ingredients:
        # Update ingredient count
        ingredient = ingredient.lower().strip()
        ingredient_counts[ingredient] = ingredient_counts.get(ingredient, 0) + 1
        
        # Update recipes containing this ingredient
        if ingredient not in recipes_with_ingredient:
            recipes_with_ingredient[ingredient] = set()
        recipes_with_ingredient[ingredient].add(recipe_name)

In [65]:
# dictionary to store ingredient counts
tag_counts = {}
# dictionary to store recipes containing each ingredient
recipes_with_tags = {}

for index, row in clean_recipes.iterrows():
    recipe_name = row['name']
    # Split the ingredient string into a list
    tags = [tag.strip() for tag in row['tags'].split(',')]
    
    for tag in tags:
        # Update ingredient count
        tag = tag.lower().strip()
        tag_counts[tag] = tag_counts.get(tag, 0) + 1
        
        # Update recipes containing this ingredient
        if tag not in recipes_with_tags:
            recipes_with_tags[tag] = set()
        recipes_with_tags[tag].add(recipe_name)

In [66]:
def find_recipes_with_ingredients(ingredients, tags, max_cooking_time, similarity_threshold=70):
    if not ingredients or not tags:
        print("Both ingredients and tags must be provided.")
        return pd.DataFrame()

    ingredients = [ingredient.lower() for ingredient in ingredients]
    tags = [tag.lower() for tag in tags]

    def fuzzy_match(item, choices, threshold):
        matches = process.extractBests(item, choices, score_cutoff=threshold)
        return set(match[0] for match in matches)

    matching_recipes = set(clean_recipes['name'])
    matching_tags = set(clean_recipes['name'])

    # Find recipes with all ingredients
    if ingredients:
        all_ingredients = set(recipes_with_ingredient.keys())
        for ingredient in ingredients:
            fuzzy_matches = fuzzy_match(ingredient, all_ingredients, similarity_threshold)
            ingredient_recipes = set().union(*(recipes_with_ingredient.get(match, set()) for match in fuzzy_matches))
            matching_recipes = matching_recipes.intersection(ingredient_recipes)

    # Find recipes with all tags
    if tags:
        all_tags = set(recipes_with_tags.keys())
        for tag in tags:
            fuzzy_matches = fuzzy_match(tag, all_tags, similarity_threshold)
            tag_recipes = set().union(*(recipes_with_tags.get(match, set()) for match in fuzzy_matches))
            matching_tags = matching_tags.intersection(tag_recipes)

    # Find the intersection of recipes with matching ingredients and tags
    final_matches = matching_recipes.intersection(matching_tags)
    if not final_matches:
        print("No recipes match both ingredients and tags criteria.")
        return pd.DataFrame()
    print(f'Number of matching recipes: {len(final_matches)}')
    # Filter by cooking time using the DataFrame
    matching_df = clean_recipes[
        (clean_recipes['name'].isin(final_matches)) & 
        (clean_recipes['minutes'] <= max_cooking_time)
    ]

    # Print results
    if not matching_df.empty:
        print(f"\nRecipes containing {', '.join(ingredients)} meeting requirements {', '.join(tags)} and cooking time <= {max_cooking_time} minutes:")
        for _, row in matching_df.iterrows():
            print(f"{row['name']} (Cooking time: {row['minutes']} minutes)")
    else:
        print(f"\nNo recipes found containing all of these ingredients: {', '.join(ingredients)} with these requirements: {', '.join(tags)} and cooking time <= {max_cooking_time} minutes")

    return matching_df

In [91]:
#Example usage:
all_matching_recipes = find_recipes_with_ingredients(['chicken breast','broccoli'],['dinner'],60)

Number of matching recipes: 33

Recipes containing chicken breast, broccoli meeting requirements dinner and cooking time <= 60 minutes:
basil chicken with broccoli (Cooking time: 15 minutes)
broccoli   chicken with hoisin sauce (Cooking time: 10 minutes)
broccoli chicken dish  gluten free (Cooking time: 55 minutes)
cheesy broccoli bacon chicken casserole (Cooking time: 45 minutes)
cheesy chicken   broccoli casserole (Cooking time: 35 minutes)
chicken   broccoli casserole    all time fav (Cooking time: 45 minutes)
chicken and broccoli pasta (Cooking time: 50 minutes)
chicken and broccoli rice casserole (Cooking time: 45 minutes)
chicken tortellini casserole (Cooking time: 38 minutes)
chicken  broccoli   angel hair pasta (Cooking time: 40 minutes)
chicken vegetable casserole (Cooking time: 50 minutes)
christopher s teriyaki stir fry (Cooking time: 40 minutes)
creamy chicken noodle casserole (Cooking time: 60 minutes)
easy chicken gumbo (Cooking time: 25 minutes)
garlic chicken   voila co

In [93]:
suggested_recipes = all_matching_recipes.sort_values(by=['n_steps']).head(3)

In [95]:
suggested_recipes

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,ingredient_list,tag_list
46105,chicken tortellini casserole,232122,38,485637,6/4/2007,"60-minutes-or-less, time-to-make, course, main...","816.1, 47.0, 11.0, 25.0, 134.0, 53.0, 22.0",4,"cook tortellini for 8 minutes, combine all ing...",simple dinner for a busy life.,"tortellini, broccoli, alfredo sauce, chicken b...",4,"[tortellini, broccoli, alfredo sauce, chicken ...","[60-minutes-or-less, time-to-make, course, mai..."
40167,cheesy chicken broccoli casserole,157120,35,115853,2/22/2006,"60-minutes-or-less, time-to-make, course, main...","260.3, 26.0, 8.0, 29.0, 34.0, 40.0, 3.0",5,"preheat oven to 350, place all ingredients in ...",just threw this together one night with what i...,"chicken breast, cream of mushroom soup, brocco...",4,"[chicken breast, cream of mushroom soup, brocc...","[60-minutes-or-less, time-to-make, course, mai..."
158388,perfect pasta,215664,20,341170,3/8/2007,"30-minutes-or-less, time-to-make, course, main...","458.7, 17.0, 81.0, 23.0, 49.0, 13.0, 22.0",6,"cook pasta according to package directions, ad...",taken from a mom and me cookbook. i haven't tr...,"pasta, broccoli, chicken breasts, corn, green ...",9,"[pasta, broccoli, chicken breasts, corn, green...","[30-minutes-or-less, time-to-make, course, mai..."
