In [1]:
import random
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
import json

In [2]:
with open('all_ingredients_list.txt', 'r') as f:
    all_ingredients_list = [line.strip() for line in f]

In [3]:
len(all_ingredients_list)

13732

In [4]:
ingredients_at_home =   ['olive oil','thai green chili','chicken drumstick','serrano pepper','soymilk', 'coconut milk', 'fatty bacon', 'brown sugar', 'onion', 'garlic',
                         'tomato', 'parsley', 'mint', 'lemon','yogurt', 'red pepper', 'black pepper', 'eggplant', 'green pepper', 'cucumber', 'sumac', 'dill', 'cumin', 
                         'thyme', 'rice', 'bulgur','lamb', 'chicken', 'beef', 'paprika', 'cinnamon', 'clove', 'oregano', 'chickpeas', 'lentils', 'pomegranate', 'apricots',
                         'figs', 'grape leaves', 'walnuts', 'hazelnuts', 'pine nuts', 'pistachios', 'honey', 'molasses', 'rose water', 'orange blossom water', 'vinegar',
                         'butter', 'feta cheese', 'kasseri cheese', 'haloumi cheese', 'phyllo dough', 'red lentils', 'black olives', 'green olives', 'bay leaves', 'coriander',
                         'caper', 'allspice', 'star anise', 'cardamom', 'fenugreek', 'turmeric', 'ginger', 'saffron', 'sesame seeds', 'tahini', 'pomegranate molasses','cured beef',
                         'fresh beans', 'green lentils', 'red cabbage', 'white cabbage', 'turnips', 'artichokes', 'okra', 'zucchini', 'pumpkin', 'cauliflower']

In [5]:
#!pip uninstall inflect typeguard typing-extensions -y
#!pip install typing-extensions==4.7.1
#!pip install inflect==5.3.0

In [6]:
import inflect
p = inflect.engine()

In [7]:
def to_singular(ingredient):
    return p.singular_noun(ingredient) or ingredient

# Define a function to process a list of ingredients
def process_ingredient_list(ingredient_list):
    return [to_singular(ingredient) for ingredient in ingredient_list]

In [8]:
ingredients_at_home=process_ingredient_list(ingredients_at_home)

In [9]:
ingredients_at_home_appended=[" ".join(ingredients_at_home)]

In [10]:
cuisine_ingredient_model= joblib.load('cuisine_prediction.joblib')

In [11]:
ingredients_at_home_cuisine_type=cuisine_ingredient_model.predict(ingredients_at_home_appended)[0]

In [12]:
print(f"According to our algorithm, the ingredients you have at home are similar to the ingredients used in {ingredients_at_home_cuisine_type} cuisine. Our algorithm is going to recommend you recipes from the cuisines similar to {ingredients_at_home_cuisine_type} cuisine.")

According to our algorithm, the ingredients you have at home are similar to the ingredients used in lebanese cuisine. Our algorithm is going to recommend you recipes from the cuisines similar to lebanese cuisine.


In [58]:
def generate_random_date():
    today = datetime.now()
    random_days = random.randint(1, 100)
    expiration_date = today + timedelta(days=random_days)
    return expiration_date.strftime("%Y-%m-%d")

# Associate each selected ingredient with a random expiration date
ingredients_with_dates = {ingredient: generate_random_date() for ingredient in ingredients_at_home}

# Convert to a DataFrame for easier handling and potential saving
df_ingredients = pd.DataFrame(list(ingredients_with_dates.items()), columns=['Ingredient', 'Expiration_Date'])

# Print the DataFrame
print(df_ingredients)

           Ingredient Expiration_Date
0           olive oil      2024-07-08
1    thai green chili      2024-08-21
2   chicken drumstick      2024-09-05
3      serrano pepper      2024-07-07
4             soymilk      2024-06-14
..                ...             ...
75          artichoke      2024-05-31
76               okra      2024-07-20
77           zucchini      2024-07-31
78            pumpkin      2024-07-28
79        cauliflower      2024-08-30

[80 rows x 2 columns]


In [59]:
# Convert the 'Expiration_Date' column to datetime format
df_ingredients['Expiration_Date'] = pd.to_datetime(df_ingredients['Expiration_Date'])

# Get the current date as a pandas Timestamp
today = pd.Timestamp(datetime.now())

# Calculate the number of days left until expiration
df_ingredients['Days_Left'] = (df_ingredients['Expiration_Date'] - today).dt.days

In [60]:
# Ensure the 'Ingredient' column is set as the index
df_ingredients.set_index('Ingredient', inplace=True)

In [61]:
df_ingredients.head(50)

Unnamed: 0_level_0,Expiration_Date,Days_Left
Ingredient,Unnamed: 1_level_1,Unnamed: 2_level_1
olive oil,2024-07-08,38
thai green chili,2024-08-21,82
chicken drumstick,2024-09-05,97
serrano pepper,2024-07-07,37
soymilk,2024-06-14,14
coconut milk,2024-06-15,15
fatty bacon,2024-06-03,3
brown sugar,2024-06-11,11
onion,2024-07-14,44
garlic,2024-06-28,28


In [62]:
# Step 2: Create the weighted pantry vector in a sparse way
weighted_pantry_vector = np.array([1/(0.01+df_ingredients.at[ingredient, 'Days_Left']) if ingredient in df_ingredients.index else 0 for ingredient in all_ingredients_list])

In [63]:
Recipes=pd.read_csv('RAW_recipes_with_clusters.csv')

In [64]:
Recipes.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,Cuisine_Tags,5_Clusters,10_Clusters,15_Clusters,20_Clusters,30_Clusters
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7,['mexican'],3,5,9,12,20
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6,['northeastern-united-states'],4,9,13,18,27
2,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,['northeastern-united-states'],4,9,13,18,27
3,aww marinated olives,25274,15,21730,2002-04-14,"['15-minutes-or-less', 'time-to-make', 'course...","[380.7, 53.0, 7.0, 24.0, 6.0, 24.0, 6.0]",4,['toast the fennel seeds and lightly crush the...,my italian mil was thoroughly impressed by my ...,"['fennel seed', 'green olive', 'ripe olive', '...",9,['canadian'],4,9,13,18,27
4,chile rellenos,43026,45,52268,2002-10-14,"['60-minutes-or-less', 'time-to-make', 'course...","[94.0, 10.0, 0.0, 11.0, 11.0, 21.0, 0.0]",9,"['drain green chiles', 'sprinkle cornstarch on...",a favorite from a local restaurant no longer i...,"['egg roll wrap', 'whole green chili', 'cheese...",5,['southwestern-united-states'],3,5,9,12,20


In [65]:
openness_to_different_cuisines=3

In [66]:
if openness_to_different_cuisines==1:
    with open('cuisine_clusters30.json', 'r') as f:
        clusters_for_recipes = json.load(f) 
elif openness_to_different_cuisines==2:
    with open('cuisine_clusters20.json', 'r') as f:
        clusters_for_recipes = json.load(f)
elif openness_to_different_cuisines==3:
    with open('cuisine_clusters15.json', 'r') as f:
        clusters_for_recipes = json.load(f)
elif openness_to_different_cuisines==4:
    with open('cuisine_clusters10.json', 'r') as f:
        clusters_for_recipes = json.load(f)
else:
    with open('cuisine_clusters5.json', 'r') as f:
        clusters_for_recipes = json.load(f)

In [67]:
cuisine_to_cluster = {cuisine: cluster for cluster, cuisines in clusters_for_recipes.items() for cuisine in cuisines}
def get_cluster_number(cuisine_tags):
    return np.int64(cuisine_to_cluster.get(cuisine_tags[0], None))

In [68]:
Preferred_cuisine_number=get_cluster_number([ingredients_at_home_cuisine_type])

In [69]:
if openness_to_different_cuisines==1:
    Recipes_Preffered = Recipes[Recipes['30_Clusters'] == Preferred_cuisine_number]
elif openness_to_different_cuisines==2:
    Recipes_Preffered = Recipes[Recipes['20_Clusters'] == Preferred_cuisine_number]
elif openness_to_different_cuisines==3:
    Recipes_Preffered = Recipes[Recipes['15_Clusters'] == Preferred_cuisine_number]
elif openness_to_different_cuisines==4:
    Recipes_Preffered = Recipes[Recipes['10_Clusters'] == Preferred_cuisine_number]
else:
    Recipes_Preffered = Recipes[Recipes['5_Clusters'] == Preferred_cuisine_number]

In [70]:
Recipes_Preffered.reset_index(drop=True, inplace=True)

In [71]:
len(Recipes_Preffered)

38904

In [72]:
import ast
ingredients_of_recipes=Recipes_Preffered['ingredients'].apply(ast.literal_eval)
Recipes_Preffered['Cuisine_Tags']=Recipes_Preffered['Cuisine_Tags'].apply(ast.literal_eval)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Recipes_Preffered['Cuisine_Tags']=Recipes_Preffered['Cuisine_Tags'].apply(ast.literal_eval)


In [73]:
len(ingredients_of_recipes)

38904

In [74]:
from scipy.sparse import lil_matrix, csr_matrix
from scipy.sparse import save_npz, load_npz

In [75]:
num_recipes = len(ingredients_of_recipes)
num_ingredients = len(all_ingredients_list)

In [76]:
# Step 2: Create a mapping from ingredient to index
ingredient_to_index = {ingredient: idx for idx, ingredient in enumerate(all_ingredients_list)}

In [77]:
# Initialize a sparse matrix with the shape (num_recipes, num_ingredients)
binary_matrix = lil_matrix((num_recipes, num_ingredients), dtype=int)

# Fill the sparse matrix
for i, recipe_ingredients in enumerate(ingredients_of_recipes):
    for ingredient in recipe_ingredients:
        if ingredient in ingredient_to_index:
            j = ingredient_to_index[ingredient]
            binary_matrix[i, j] = 1


In [78]:
# Convert to CSR format for efficient arithmetic and matrix operations
binary_matrix_csr = binary_matrix.tocsr()

In [79]:
weighted_pantry_vector_sparse = csr_matrix(weighted_pantry_vector)

In [80]:
recipe_scores=binary_matrix_csr.dot(weighted_pantry_vector_sparse.T)

In [81]:
recipe_scores=np.array(recipe_scores.toarray().flatten().tolist())

In [82]:
np.argsort(recipe_scores)

array([19451,  6502, 20569, ...,  2270, 26886, 34439], dtype=int64)

In [83]:
# Find the indices of the 10 largest entries
indices_of_largest_entries = np.argsort(recipe_scores)[-10:]

# Get the 10 largest entries using the indices
recipe_scores[indices_of_largest_entries]

array([101.07828793, 101.1410682 , 101.14815559, 101.23724926,
       101.23801984, 101.26578443, 101.28006811, 101.40413173,
       101.56938981, 101.58118637])

In [84]:
indices_of_largest_entries[9]

34439

In [85]:
Recipes_Preffered['name'][indices_of_largest_entries[9]]

'spicy lamb meatballs and lentils'

In [86]:
Recipes_Preffered['steps'][indices_of_largest_entries[9]]

"['in a bowl , combine lamb , bacon , water , garlic , mint , parsley , spices and seasoning', 'mix well', 'divide the mixture into about 25-30 small meatballs- flatten a little to a disc shape', 'place on tray and refrigerate until ready to use', 'to prepare the lentils , heat the olive oil in pan and add the onion , carrot and cloves of garlic and saute for 5 minutes until the onions have become transparent', 'add the lentils and stir to coat with the oil , about 2-3 minutes', 'add 1 cup tomatoes , chicken stock , seasoning and bouquet garni', 'bring to the boil and turn down to a simmer', 'cover and cook until the lentils are tender', 'discard the bouquet garni', 'heat extra olive oil in a pan and saute the meatballs in batches in a single layer', 'remove and add to the lentils', 'continue to simmer the meatballs and lentils for 15 minutes or until the meatballs are cooked through', 'add the remaining cup of tomatoes and cook a further 10 minutes', 'taste and adjust seasoning', 'ser

In [87]:
Recipes_Preffered['ingredients'][indices_of_largest_entries[9]]

"['ground lamb', 'bacon', 'water', 'garlic', 'mint', 'parsley', 'paprika', 'cayenne', 'cumin', 'coriander', 'cinnamon', 'oregano', 'thyme', 'clove', 'cardamom', 'fresh ground black pepper', 'salt', 'olive oil', 'onion', 'carrot', 'lentil', 'diced tomato', 'chicken stock', 'bay leaf']"

In [88]:
Recipes_Preffered['name'][indices_of_largest_entries[8]]

'moroccan b stella'

In [89]:
Recipes_Preffered['ingredients'][indices_of_largest_entries[8]]

"['extra virgin olive oil', 'onion', 'chicken breast', 'parsley', 'cilantro', 'black pepper', 'saffron', 'ginger', 'nutmeg', 'clove', 'salt', 'water', 'egg', 'phyllo dough', 'butter', 'cinnamon', 'sugar', 'almond', 'granulated sugar', 'powdered sugar']"

In [90]:
Recipes_Preffered['steps'][indices_of_largest_entries[8]]

'[\'heat olive oil in pan , then add onions , chicken pieces , parsley , cilantro , black pepper , saffron , ginger , cinnamon , nutmeg , clove , and salt\', \'stir well and add the 2 cups of water\', \'cover with a lid and cook for 20 minutes\', \'remove the chicken from pan and set aside\', \'allow the sauce to cool slightly then add the beaten eggs to the sauce in the pan\', "cook the eggs with the sauce stirring continually so eggs won\'t stick", \'when eggs are cooked , remove the pan from the heat and drain the eggs well in a colander\', \'set he eggs aside to cool\', \'open the phyllo and cover with a damp towel so it will not dry out\', \'take an 8-10 inch round baking pan / dish and cover generously with butter\', \'very gently , spread 5 sheets of phyllo dough alternately criss-cross over the pan , leaving some phyllo dough draped around the edges of the pan\', \'add another sheet of phyllo crunched in the middle of the pan\', \'mix together the 1 / 2 teaspoon cinnamon & 1 / 

In [91]:
Recipes_Preffered['name'][indices_of_largest_entries[7]]

'copycat tgi friday s lemon chicken scaloppine'

In [92]:
Recipes_Preffered['ingredients'][indices_of_largest_entries[7]]

"['chicken breast', 'olive oil', 'sliced mushroom', 'lemon', 'heavy whipping cream', 'artichoke', 'parsley', 'angel hair pasta', 'pancetta', 'caper', 'fresh lemon juice', 'chabli', 'butter', 'whipping cream', 'thyme', 'salt', 'pepper']"

In [93]:
Recipes_Preffered['steps'][indices_of_largest_entries[7]]

"['for the chicken: heat a saut pan over medium heat', 'add oil and heat', 'add chicken pieces to the saut pan and saut on each side for one minute', 'add the sliced mushrooms to the saut pan and saut with the chicken for an additional minute', 'when the mushrooms are cooked , squeeze the juice from the lemons into the saut pan and coat the chicken with the juice', 'add the cream to the pan and stir to incorporate', 'add the lemon sauce and stir to incorporate- do not return to heat / flame !', 'for the lemon sauce: boil the chablis to reduce to 2 cups', 'add the lemon juice and butter and melt slowly', 'add the whipping cream and simmer on low heat until thickened', 'add the spices and cool to room temperature', 'to serve: in a large bowl , twirl the pasta into a nest', 'sprinkle the chicken pieces on the pasta and pour the remaining contents of the pan on and around the chicken', 'sprinkle the artichokes , pancetta and capers over the entire dish', 'garnish with chopped parsley']"