In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib

In [2]:
df = pd.read_csv("./Cleaned_Indian_Food_Dataset.csv", nrows=5000)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 2 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   TranslatedRecipeName  5000 non-null   object
 1   ingredients_parsed    5000 non-null   object
dtypes: object(2)
memory usage: 78.2+ KB


In [3]:
df = df.dropna()

In [4]:
df_ing_parsed = df["ingredients_parsed"].values.astype("U")

In [5]:
df_ing_parsed

array(['amchur karela chilli powder gram flour onion cumin coriander powder turmeric powder sunflower',
       'tomato chickpea lentils chilli rice mustard bc belle bhat powder chilli cashew peanuts oilasafoetida cumin urad dal',
       'rice vermicelli noodles asafoetida mustard ghee peas carrot curry urad dal onion lemon chillies',
       ...,
       'powder milk peanut buttersmooth peanut butter crunc peanut butter flour',
       'arhar dal coconut mustard mixed vegetables curry urad dal chillies cumin coconut turmeric powder',
       'tomato cumin powder arhar dal ginger pepper powder mustard ghee asafoetida jaggery mango curry coriander cumin lemon coriander powder turmeric powder chillies'],
      dtype='<U340')

In [6]:
vectorizer = TfidfVectorizer()

In [7]:
tfidf_recipe = vectorizer.fit_transform(df_ing_parsed)

In [8]:
joblib.dump(tfidf_recipe, "./tfidf_recipe.pkl")

['./tfidf_recipe.pkl']

In [9]:
joblib.dump(vectorizer, "./vectorizer.pkl")

['./vectorizer.pkl']

In [10]:
from sklearn.metrics.pairwise import cosine_similarity

In [11]:
tfidf_recipe.shape

(5000, 1780)

In [12]:
cosine_sim = cosine_similarity(tfidf_recipe, tfidf_recipe)

In [13]:
print(cosine_sim)

[[1.         0.06887159 0.024998   ... 0.04576724 0.1012628  0.29710631]
 [0.06887159 1.         0.11897308 ... 0.00366424 0.13787823 0.11843156]
 [0.024998   0.11897308 1.         ... 0.         0.26001135 0.27212061]
 ...
 [0.04576724 0.00366424 0.         ... 1.         0.00616631 0.02404231]
 [0.1012628  0.13787823 0.26001135 ... 0.00616631 1.         0.41333939]
 [0.29710631 0.11843156 0.27212061 ... 0.02404231 0.41333939 1.        ]]


In [14]:
import numpy as np

In [26]:
def recommend(strOfIngredients):

    ing_v = vectorizer.transform([strOfIngredients])
    
    similarity_list = cosine_similarity(ing_v, tfidf_recipe)
    
    sorted_indexes = np.argsort(similarity_list[0])[::-1]
    
    return df['TranslatedRecipeName'].iloc[sorted_indexes].values[0:50]

In [27]:
print(recommend("amchur karela chilli powder gram flour onion cumin coriander powder turmeric powder sunflower"))

['Masala Karela Recipe' 'Bitter Gourd Peel Puri Recipe'
 'Crispy Vegetable Recipe - Crispy Bhujia'
 'Bitter gourd spice vegetable recipe'
 'Karela Aur Baingan Ki Sabzi Recipe - Bitter Gourd Brinjal Sabzi'
 'Bihari Bitter Gourd Potato Vegetable Recipe' 'Bitter Gourd Chips Recipe'
 'Kare Badge Badge Papa Badge Badge Badge |'
 'Karela Thepla Recipe - Bitter Gourd Indian Flat Bread'
 'Bhopli Mirchi Chi Peeth Perun Bhaji Recipe'
 'Andhra Style Kakarakaya Vepudu Recipe - Bitter Gourd Stir Fry'
 'Vendakkai Varuval Recipe - Tamil Nadu Style Okra Stir Fry'
 'Kaju Karela Recipe - Bitter Gourd And Cashew Nuts Stir Fry'
 'Rajasthani Style Dried Gatta Capsicum Vegetable Recipe'
 'Rajasthani Ghasela recipe - Besan Ka Pitta (Recipe in Hindi)'
 'Stuffed Butter Recipe - Stuffed Spice Gravy'
 'Rajma Aloo Cutlet Recipe (Tikki / Patty)'
 'Beguni Recipe - Baingan/Eggplant Pakora'
 'Karela Muthia Recipe - Bitter Gourd Steamed Dumplings'
 'Karatyacho Fodi Recipe - Pan Fried Bitter Gourd'
 'Spinach Thalipeeth