# Recipes dataset preprocessing


In [1]:
import numpy as np
import pandas as pd
from annoy import AnnoyIndex

In [2]:
db_recipes = pd.read_csv('data/01_Recipe_Details.csv')
db_details = pd.read_csv('data/04_Recipe-Ingredients_Aliases.csv')

In [3]:
ingredients = [ingr.strip() for ingr in db_details['Aliased Ingredient Name'].unique()]
num_dims = len(ingredients)
dict_ingredients = {name : id for name, id in zip(ingredients, range(num_dims))}

In [27]:
num_recipes = len(db_recipes)
vectors_ingr = []
for id in range(num_recipes):
    recipe_ingr = [ingr.strip() for ingr in db_details[db_details['Recipe ID'] == id + 1]['Aliased Ingredient Name']]
    v = [1 if ingr in recipe_ingr else 0 for ingr in ingredients]
    vectors_ingr.append(v)

In [25]:
t = AnnoyIndex(num_dims, 'euclidean')
for id in range(num_recipes):
    t.add_item(id, vectors_ingr[id])
t.build(40)
t.save('base.tree')

True

In [24]:
rec_id = 1
rec_ingr = list(db_details[db_details['Recipe ID'] == rec_id + 1]['Aliased Ingredient Name'])

neighbors = t.get_nns_by_item(rec_id, 10)

meand = np.mean([t.get_distance(rec_id, x) for x in neighbors if x != rec_id])

for x in neighbors:
    if x != rec_id and t.get_distance(rec_id, x) <= meand:
        name = list(db_recipes[db_recipes['Recipe ID'] == x + 1]['Title'])[0]
        ingr = list(db_details[db_details['Recipe ID'] == x + 1]['Aliased Ingredient Name'])
        n_common = len([x for x in rec_ingr if x in ingr])
        dist = t.get_distance(rec_id, x)
        print(name, '({}) ingr'.format(len(ingr)), '| Common Ingredients:', n_common, '| Dist:', dist)

achaari dip (8) ingr | Common Ingredients: 6 | Dist: 5.0
dill khakhra with aachari dip (10) ingr | Common Ingredients: 7 | Dist: 5.0
instant handva (4) ingr | Common Ingredients: 4 | Dist: 5.0


In [105]:
db_details

Unnamed: 0,Recipe ID,Original Ingredient Name,Aliased Ingredient Name,Entity ID
0,1,capsicum,capsicum,362
1,1,green bell pepper,pepper bell,362
2,1,soy sauce,soy sauce,291
3,1,sunflower oil,sunflower,426
4,2,buttermilk,buttermilk,61
...,...,...,...,...
456274,45772,1/2 tsp salt,salt,778
456275,45772,"1 yellow squash, diced",squash yellow,493
456276,45772,1/2 tsp dried thyme,thyme,269
456277,45772,1 tsp Worcestershire sauce,worcestershire sauce,2031
