In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import TruncatedSVD, NMF
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
import functions

# Set up for matplotlib inline
%matplotlib inline

# Load Data
rbdata_ar = pd.read_csv('CleanSrilankanRecipes.csv')
rbdata_ar.index = range(len(rbdata_ar))
data = rbdata_ar.loc[:, ['Title', 'Ingredients', 'Instructions','CleanIngredients']]
data = data.dropna(how='any')
data.index = range(len(data))

# Process ingredients column
data_ingr = data['CleanIngredients']

# Vectorize ingredients
corpus = data_ingr
ct_vectorizer = CountVectorizer(stop_words=stopwords_ingr)
ingr_ct = ct_vectorizer.fit_transform(corpus)

# Apply LSA
lsa_10 = TruncatedSVD(10)
ct_lsa_10 = lsa_10.fit_transform(ingr_ct)
functions.display_topics(lsa_10, ct_vectorizer.get_feature_names(), 20)

# Apply NMF
nmf_10 = NMF(n_components=10, init='nndsvda', max_iter=14000, tol=1e-4)
rb_nmf_10 = nmf_10.fit_transform(ingr_ct)
functions.display_topics(nmf_10, ct_vectorizer.get_feature_names(), 20)


Topic  0
curry, salt, coconut, leaves, chili, seeds, oil, onion, turmeric, garlic, milk, pep, cloves, water, cooking, ginger, black, red, thick, mustard

Topic  1
pep, chili, cut, ginger, black, ground, chicken, salt, akes, paste, cloves, garlic, minced, water, crushed, sugar, oil, butter, beef, sauce

Topic  2
seeds, cooking, mustard, fennel, cumin, paste, cori, ginger, black, dried, onion, oil, garlic, chicken, leaves, pep, cinnamon, cardamom, turmeric, tamarind

Topic  3
curry, leaves, cloves, leaf, beef, garlic, cinnamon, broken, minced, cut, fenugreek, long, roasted, cayenne, ginger, vegetable, ground, youre, chilies, making

Topic  4
pep, seeds, black, coconut, cumin, ground, cardamom, cloves, cut, cinnamon, fennel, freshly, cori, milk, sugar, grated, corns, vinegar, rice, pods

Topic  5
easter, cake, food, coloring, vanilla, recipe, yellow, curry, pink, buttercream, wilton, tip, green, eggs, ground, grass, batter, fect, bunnies, coloringvanilla

Topic  6
milk, ground, pep, suga

In [6]:
data_ingr

0      ['chicken potatoes cheddar cheese garlic onion...
1      ['chicken potatoes cheddar cheese habanero chi...
2      ['bone chicken curry red chili onion cloves ga...
3      ['beans chili cinnamon pearl onions Asian shal...
4      ['cooked rice eggs gheecooking oil onion ginge...
                             ...                        
374    [' Payatham Urundai Mung bean Green moong dhal...
375    ['Butter Granulated sugar Egg Vanilla extract ...
376    ['peanut Butter Brown sugar White sugar Vanill...
377      ['palmyra fruit juice Sugar Water Cooking oil']
378    ['Banana Wood apple Papaya Mango Raisins cashe...
Name: CleanIngredients, Length: 379, dtype: object

In [7]:
# Compute similarity matrix
similarity = cosine_similarity(ct_lsa_10)

In [8]:
pickle.dump(ct_vectorizer, open('Artifacts/sl_ct_vectorizer.pkl', 'wb'))
pickle.dump(lsa_10, open('Artifacts/sl_lsa_10.pkl', 'wb'))
pickle.dump(ct_lsa_10, open('Artifacts/sl_ct_lsa_10.pkl', 'wb'))

In [9]:
pickle.dump(data, open('Artifacts/sl_RecipeList.pkl', 'wb'))
pickle.dump(similarity, open('Artifacts/sl_similarity.pkl', 'wb'))

In [10]:
# Function to recommend recipes
def recommend(ingredients):
    # Load precomputed data and similarity matrix
    data = pickle.load(open('Artifacts/sl_RecipeList.pkl', 'rb'))
    similarity = pickle.load(open('Artifacts/sl_similarity.pkl', 'rb'))
    
    # Process the input ingredients
    input_ingr = functions.regex_nodigits_new(ingredients)
    input_vector = ct_vectorizer.transform([input_ingr])
    
    # Compute similarity scores
    input_lsa = lsa_10.transform(input_vector)
    scores = cosine_similarity(input_lsa, ct_lsa_10)
    
    # Get top 5 similar recipes
    indices = np.argsort(scores[0])[::-1][1:6]
    
    # Display the recommended recipes
    for idx in indices:
        print(f"Recipe Title: {data.iloc[idx]['Title']}")
        print(f"Ingredients: {data.iloc[idx]['Ingredients']}")
        print(f"Instructions: {data.iloc[idx]['Instructions']}\n")

# Example usage
recommend("chicken, potato, olive oil")

Recipe Title: Air-fried eggplant salad
Ingredients: ['To fry eggplants\n2 long eggplants\n1â\x81„4 tsp turmeric powder (optional)\noil, enough to coat eggplant slices\nTo make the salad\n1â\x81„4 cup thinly sliced shallots or red onions\n1 green chili sliced (adjust to your liking)\n1â\x81„2 cup cherry tomatoes/ sliced regular tomatoes\n1 tsp maldive fish flakes (optional)\n1 tsp lime juice\nsalt as needed\npepper as needed']
Instructions: ["Cut eggplants into long strips. About 1 cm thick. Use long Asian eggplants for this (not the big Mediterranean eggplants)\nPut all the eggplants into a bowl, add turmeric, some oil about 2 tsp and mix well so that each and every piece gets covered with oil. Add more or less according to the amount of eggplant you use. The only requirement here is to coat all the eggplant pieces with a very thin layer of oil. Turmeric is totally optional. If you want you can add salt too. But I like to add salt at the end. That way it's easier to can control the sal