In [1]:
import pandas as pd
import numpy as np

### mapping of ingredients to its base ingredient 

In [2]:
import pickle

# Load the dictionary from the file
with open('mapping_dict.pkl', 'rb') as f:
    mapping_dict = pickle.load(f)

mapping_dict


{'lime': 'lime',
 'pickled ginger': 'pickled ginger',
 'pickled ginger juice': 'pickled ginger juice',
 'pickled jalapeño': 'pickled jalapeño',
 'pickle brine': 'pickle brine',
 'pickle juice': 'pickle juice',
 'jalapeño': 'jalapeño',
 'jalapeńo': 'jalapeño',
 'jalapeno': 'jalapeño',
 'jalopeno': 'jalapeño',
 'ginger': 'ginger',
 'garlic': 'garlic',
 'peppers': 'bell pepper',
 'bell pepper': 'bell pepper',
 'honey': 'honey',
 'chili flake': 'chili flake',
 'cucumber': 'cucumber',
 'basil': 'basil',
 'avocado': 'avocado',
 'avocdos': 'avocado',
 'peanut': 'peanut',
 'vinegar': 'vinegar',
 'thyme': 'thyme',
 'arugula': 'arugula',
 'tacos': 'taco shells',
 'lettuce': 'lettuce',
 'Greek yogurt': 'yogurt',
 'mayo': 'mayo',
 'buttermilk': 'buttermilk',
 'parsley': 'parsley',
 'dill': 'dill',
 'garlic powder': 'garlic powder',
 'onion powder': 'onion powder',
 'tortilla': 'tortilla',
 'apple cider vinegar': 'apple cider vinegar',
 'apple cider': 'apple cider vinegar',
 'balsamic': 'vinegar',


### list of non-vegan ingredients in the database

In [3]:
with open('non_vegan_ingredients.txt', 'r') as file:
    non_vegan_ingr = [line.strip() for line in file]

non_vegan_ingr

['anchovy',
 'bbq sauce',
 'beer',
 'buffalo sauce',
 'butter',
 'buttermilk',
 'cheese',
 'chocolate',
 'chocolate ice cream',
 'chocolate malt powder',
 'egg',
 'fish sauce',
 'heavy cream',
 'honey',
 'marsala wine',
 'marshmallow',
 'mayo',
 'meat - beef - beef',
 'meat - beef - brisket',
 'meat - beef - chuck roast',
 'meat - beef - ribs',
 'meat - beef - steak',
 'meat - fish',
 'meat - fish - mahi mahi',
 'meat - fish - salmon',
 'meat - fish - shellfish - clams',
 'meat - fish - shellfish - lobster',
 'meat - fish - shellfish - mussel',
 'meat - fish - shellfish - scallops',
 'meat - fish - shellfish - shrimp',
 'meat - fish - tuna',
 'meat - other meat - bison',
 'meat - other meat - lamb',
 'meat - pork - bacon',
 'meat - pork - ham',
 'meat - pork - pancetta',
 'meat - pork - pork',
 'meat - pork - pork chop',
 'meat - poultry - chicken',
 'meat - poultry - turkey',
 'meat - processed meat - deli meat',
 'meat - processed meat - prosciutto',
 'meat - processed meat - salami'

### alternatives to non-vegan ingredients

In [4]:
df_non_vegan_alter = pd.read_csv('nonvegan-alternatives.csv')
df_non_vegan_alter


Unnamed: 0,BaseIngredient,VeganAlternative
0,anchovy,miso paste
1,bbq sauce,Hoisin sauce
2,beer,sparkling water
3,buffalo sauce,hot sauce
4,butter,coconut oil
5,buttermilk,non-dairy milk with lemon juice
6,cheese,"Cashew Cheese, vegan cheese"
7,chocolate,cocoa powder
8,chocolate ice cream,Non-dairy chocolate ice cream
9,chocolate malt powder,cocoa powder


## Function mapping ingredients to base ingredient

In [5]:
def extract_base_ingr(ingredients_list, instructions):
    base_ingr = []
    for ingr in ingredients_list:
        ingr = ingr.lower()
        found = False
        for key in mapping_dict:
            if found == True:
                break
            if key.lower() in ingr:
                found = True
                base_ingr.append(mapping_dict[key].lower())
              
    # remove duplicates
    seen = set()
    base_ingr = [x for x in base_ingr if not (x in seen or seen.add(x))]
    return base_ingr
    
    


## Function mapping ingredients to base ingredient in INSTRUCTIONS

In [6]:
def base_instructions(instructions,ingredients_list):
    for ingr in ingredients_list:
        ingr = ingr.lower()
        found = False
        for key in mapping_dict:
            if found == True:
                break
            if key.lower() in ingr:
                found = True
                instructions = instructions.replace(key.lower(),mapping_dict[key].lower())
    return instructions



## Function - Mapping Non-Vegan ingredient to Vegan Ing.

In [7]:
def conver_to_vegan(ingr_list):
    vegan_list = []
    for i in ingr_list:
        if i in non_vegan_ingr:
            filtered_df = df_non_vegan_alter[df_non_vegan_alter['BaseIngredient'] == i]
            # Get the comma-separated strings from column 'VeganAlternative'
            vegan_alternatives = filtered_df['VeganAlternative']
            # Split the strings by comma and flatten the list
            result_list = [item.strip() for sublist in vegan_alternatives.str.split(',') for item in sublist]
            for alter in result_list:
                vegan_list.append(alter)
        else:
            vegan_list.append(i)
            
    return vegan_list

## Function - Mapping Non-Vegan ingredient to Available Vegan Ing. in INSTRUCTIONS

In [8]:
def instructions_with_vegan_available(instructions, ingr_list):
    for i in ingr_list:
        if i in non_vegan_ingr:
            filtered_df = df_non_vegan_alter[df_non_vegan_alter['BaseIngredient'] == i]
            vegan_alternatives = filtered_df['VeganAlternative']
            result_list = [item.strip() for sublist in vegan_alternatives.str.split(',') for item in sublist]
            found = False
            for alter in result_list:
                if alter in user_ingrs:
                    instructions = instructions.replace(i, alter)
                    found = True
                    break
            if not found:
                instructions = instructions.replace(i, result_list[0])
    # return instructions


## Function - Recipes with atleast 3 ingredients available

In [9]:
def check_ingredients(recipe_ingredients, user_ingredients):
    recipe_ingredients_set = set(recipe_ingredients)
    common_ingredients = recipe_ingredients_set.intersection(user_ingredients)
    return len(common_ingredients) >= 3



## Function - Creating Tags for vectiorization

In [10]:
def tags_vegan_available(ingr_list,user_ingrs):
    vegan_list = []
    for i in ingr_list:
        if i in non_vegan_ingr:
            filtered_df = df_non_vegan_alter[df_non_vegan_alter['BaseIngredient'] == i]
            vegan_alternatives = filtered_df['VeganAlternative']
            result_list = [item.strip() for sublist in vegan_alternatives.str.split(',') for item in sublist]
            
            found = False
            for alter in result_list:
                if alter in user_ingrs:
                    vegan_list.append(alter)
                    found = True
                    break
            if not found:
                vegan_list.append(result_list[0])
        else:
            vegan_list.append(i)
    result_string = ', '.join(vegan_list)
    return str(result_string)


In [11]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [12]:
def recommend_recipes(user_ingrs):
    # Step 1: Convert ingredients to base ingredients
    df['base_ingredients'] = df.apply(lambda row: extract_base_ingr(row['ingredients'], row['instructions']), axis=1)
    
    # Step 2: Replace instructions with base ingredients
    df['instructions'] = df.apply(lambda row: base_instructions(row['instructions'], row['ingredients']), axis=1)
    
    # Step 3: Convert ingredients to vegan ingredients
    df['vegan_ingredients'] = df['base_ingredients'].apply(conver_to_vegan)
    
    # Step 4: Filter recipes with at least 3 matching ingredients
    df['HasEnoughIngredients'] = df['vegan_ingredients'].apply(lambda x: check_ingredients(x, user_ingrs))
    suitable_recipes = df[df['HasEnoughIngredients']]
    
    # Step 5: Adjust vegan ingredients with available user ingredients
    suitable_recipes['tags'] = suitable_recipes['base_ingredients'].apply(lambda x: tags_vegan_available(x, user_ingrs))
    
    # Step 6: Replace instructions with vegan alternatives available to the user
    suitable_recipes['instructions'] = suitable_recipes.apply(lambda row: instructions_with_vegan_available(row['instructions'], row['base_ingredients']), axis=1)
    
    # Step 7: Create ingredient matrix for similarity calculation
    vectorizer = CountVectorizer(tokenizer=lambda x: x.split(', '))
    X = vectorizer.fit_transform(suitable_recipes['tags'])
    
    # Step 8: Calculate cosine similarity between user ingredients and recipes
    user_vector = vectorizer.transform([', '.join(user_ingrs)])
    similarities = cosine_similarity(user_vector, X).flatten()
    
    # Step 9: Add similarity scores and sort recipes by similarity
    suitable_recipes['Similarity'] = similarities
    recommended_recipes = suitable_recipes.sort_values(by='Similarity', ascending=False)
    
    # Step 10: Return recommended recipes
    return recommended_recipes[['title', 'Similarity', 'instructions']]


In [13]:
# user_ingrs = ['lime', 'sesame oil','garlic', 'fish sauce', 'ginger','bell pepper','tofu','cashew cheese','honey','butter','chili paste']

In [14]:
df = pd.read_csv('recipes_final.csv')

In [15]:
# Example usage:
user_ingrs = ['lime', 'sesame oil','garlic', 'fish sauce', 'ginger','bell pepper','tofu','cashew cheese','honey','butter','chili paste']
recommended = recommend_recipes(user_ingrs)
print("Recommended Recipes:")
print(recommended)

ValueError: Columns must be same length as key

TypeError: recommend_recipes() missing 1 required positional argument: 'user_ingrs'