# Finding similar recipes using pairwise-scoring
Aug 2021

The goal is simple. For each recipe pair (a,b), we're finding the number of similar ingredients in a & b divided by the number of ingredients in recipe a.

Feel free to try the UI at the end and let me know if this returns similar recipes!


In [1]:
# Importing librarires

import numpy as np
import pandas as pd
import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))

In [2]:
# Reading data
df = pd.read_csv('./data/indian_food.csv')
df.head(20)

Unnamed: 0,name,ingredients,diet,prep_time,cook_time,flavor_profile,course,state,region
0,Balu shahi,"Maida flour, yogurt, oil, sugar",vegetarian,45,25,sweet,dessert,West Bengal,East
1,Boondi,"Gram flour, ghee, sugar",vegetarian,80,30,sweet,dessert,Rajasthan,West
2,Gajar ka halwa,"Carrots, milk, sugar, ghee, cashews, raisins",vegetarian,15,60,sweet,dessert,Punjab,North
3,Ghevar,"Flour, ghee, kewra, milk, clarified butter, su...",vegetarian,15,30,sweet,dessert,Rajasthan,West
4,Gulab jamun,"Milk powder, plain flour, baking powder, ghee,...",vegetarian,15,40,sweet,dessert,West Bengal,East
5,Imarti,"Sugar syrup, lentil flour",vegetarian,10,50,sweet,dessert,West Bengal,East
6,Jalebi,"Maida, corn flour, baking soda, vinegar, curd,...",vegetarian,10,50,sweet,dessert,Uttar Pradesh,North
7,Kaju katli,"Cashews, ghee, cardamom, sugar",vegetarian,10,20,sweet,dessert,-1,-1
8,Kalakand,"Milk, cottage cheese, sugar",vegetarian,20,30,sweet,dessert,West Bengal,East
9,Kheer,"Milk, rice, sugar, dried fruits",vegetarian,10,40,sweet,dessert,-1,-1


In [3]:
## Viewing unique ingredients to make sure that the data is clean

# Creating list from 'ingredients' column
ingredients_list = []
ingredients_list = df['ingredients'].tolist()

# Getting set from list
ingredients = []
for list in ingredients_list:
    temp = list.split(', ')
    for ingredient in temp:
        ingredients.append(ingredient.lower().strip())

unique_ingredients = sorted(set(ingredients))
#print(unique_ingredients)


In [4]:
## Cleaning ingredients column

# Lowercase
df['ingredients'] = df['ingredients'].str.lower()

# Removing white spaces
df['ingredients'] = df['ingredients'].str.strip()

def comma_space(x):
    x = x.replace(', ',',')
    return x

df['ingredients'] = df['ingredients'].apply(comma_space)
#df.head(10)

In [5]:
# Creating key-value pairs (name: ingredients)
recipe_dict = dict(zip(df.name, df.ingredients))

clean_recipe_dict = {}
for recipe, ingredients in recipe_dict.items():
    temp = ingredients.split(",")
    clean_recipe_dict[recipe] = temp

#print(clean_recipe_dict)

In [6]:
# Creating recipe combinations

from itertools import combinations

recipe_combinations = []
for a,b in combinations(clean_recipe_dict.keys(), 2):
    recipe_combinations.append((a,b))

In [17]:
# Determining similarity score per recipe pair
# Score is determined as follows:
# num of ingredients in both a and b / num of ingredients in a

def similarity_score(a,b):
    ingredients_a = clean_recipe_dict[a]
    ingredients_b = clean_recipe_dict[b]
    num_similar = len(set(ingredients_a) & set(ingredients_b))
    num_a = len(ingredients_a)
    return num_similar/num_a

score_dict = {}
for combination in recipe_combinations:
    score = similarity_score(combination[0], combination[1])
    score_dict[combination] = score

# print part of dictionary
n=5
[print(v) for i, v in enumerate(score_dict.items()) if i < n]

(('Balu shahi', 'Boondi'), 0.25)
(('Balu shahi', 'Gajar ka halwa'), 0.25)
(('Balu shahi', 'Ghevar'), 0.25)
(('Balu shahi', 'Gulab jamun'), 0.25)
(('Balu shahi', 'Imarti'), 0.0)


[None, None, None, None, None]

### Below is the code used to actually interact with the recommendation system.

In [18]:
### UI: Choose a recipe to find the top 10 most similar recipes ###
# user_recipe = 'Kaju katli'
print("Enter a recipe name (case sensitive):")
user_recipe = input()

results = {}
for key in score_dict:
   if key[0] == user_recipe:
       results[key] = score_dict[key]
       

top_10_recipes = sorted(results, key=results.get, reverse=True)[:10]

results_with_scores = {}
for recipe in top_10_recipes:
    results_with_scores[recipe] = score_dict[recipe]

print(results_with_scores)

Enter a recipe name (case sensitive):


 Boondi


{('Boondi', 'Laddu'): 1.0, ('Boondi', 'Sohan papdi'): 1.0, ('Boondi', 'Gajar ka halwa'): 0.6666666666666666, ('Boondi', 'Ghevar'): 0.6666666666666666, ('Boondi', 'Gulab jamun'): 0.6666666666666666, ('Boondi', 'Kaju katli'): 0.6666666666666666, ('Boondi', 'Chhena jalebi'): 0.6666666666666666, ('Boondi', 'Ledikeni'): 0.6666666666666666, ('Boondi', 'Mihidana'): 0.6666666666666666, ('Boondi', 'Pantua'): 0.6666666666666666}
