In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import TruncatedSVD, NMF
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
import functions

# Set up for matplotlib inline
%matplotlib inline

# Load Data
rbdata_ar = pd.read_csv('CleanSrilankanRecipes.csv')
rbdata_ar.index = range(len(rbdata_ar))
data = rbdata_ar.loc[:, ['Title', 'Ingredients', 'Instructions','CleanIngredients']]
data = data.dropna(how='any')
data.index = range(len(data))

# Process ingredients column
data_ingr = data['CleanIngredients']

# Vectorize ingredients
corpus = data_ingr
ct_vectorizer = CountVectorizer(stop_words=stopwords_ingr)
ingr_ct = ct_vectorizer.fit_transform(corpus)

# Apply LSA
lsa_10 = TruncatedSVD(10)
ct_lsa_10 = lsa_10.fit_transform(ingr_ct)
functions.display_topics(lsa_10, ct_vectorizer.get_feature_names(), 20)

# Apply NMF
nmf_10 = NMF(n_components=10, init='nndsvda', max_iter=14000, tol=1e-4)
rb_nmf_10 = nmf_10.fit_transform(ingr_ct)
functions.display_topics(nmf_10, ct_vectorizer.get_feature_names(), 20)


Topic  0
curry, salt, coconut, leaves, chili, seeds, oil, onion, turmeric, garlic, milk, pep, cloves, water, cooking, ginger, black, red, thick, mustard

Topic  1
pep, chili, cut, ginger, black, ground, chicken, salt, akes, paste, cloves, garlic, minced, water, crushed, sugar, oil, butter, beef, sauce

Topic  2
seeds, cooking, mustard, fennel, cumin, paste, cori, ginger, black, dried, onion, oil, garlic, chicken, leaves, pep, cinnamon, cardamom, turmeric, tamarind

Topic  3
curry, leaves, cloves, leaf, beef, garlic, cinnamon, broken, minced, cut, fenugreek, long, roasted, cayenne, ginger, vegetable, ground, youre, chilies, making

Topic  4
pep, seeds, black, coconut, cumin, ground, cardamom, cloves, cut, cinnamon, fennel, freshly, cori, milk, sugar, grated, corns, vinegar, rice, pods

Topic  5
easter, cake, food, coloring, vanilla, recipe, yellow, curry, pink, buttercream, wilton, tip, green, eggs, ground, grass, batter, fect, bunnies, coloringvanilla

Topic  6
milk, ground, pep, suga

In [6]:
data_ingr

0      ['chicken potatoes cheddar cheese garlic onion...
1      ['chicken potatoes cheddar cheese habanero chi...
2      ['bone chicken curry red chili onion cloves ga...
3      ['beans chili cinnamon pearl onions Asian shal...
4      ['cooked rice eggs gheecooking oil onion ginge...
                             ...                        
374    [' Payatham Urundai Mung bean Green moong dhal...
375    ['Butter Granulated sugar Egg Vanilla extract ...
376    ['peanut Butter Brown sugar White sugar Vanill...
377      ['palmyra fruit juice Sugar Water Cooking oil']
378    ['Banana Wood apple Papaya Mango Raisins cashe...
Name: CleanIngredients, Length: 379, dtype: object

In [7]:
# Compute similarity matrix
similarity = cosine_similarity(ct_lsa_10)

In [8]:
pickle.dump(ct_vectorizer, open('Artifacts/sl_ct_vectorizer.pkl', 'wb'))
pickle.dump(lsa_10, open('Artifacts/sl_lsa_10.pkl', 'wb'))
pickle.dump(ct_lsa_10, open('Artifacts/sl_ct_lsa_10.pkl', 'wb'))

In [9]:
pickle.dump(data, open('Artifacts/sl_RecipeList.pkl', 'wb'))
pickle.dump(similarity, open('Artifacts/sl_similarity.pkl', 'wb'))

In [1]:
# Function to recommend recipes
def recommend(ingredients):
    # Load precomputed data and similarity matrix
    data = pickle.load(open('Artifacts/sl_RecipeList.pkl', 'rb'))
    similarity = pickle.load(open('Artifacts/sl_similarity.pkl', 'rb'))
    
    # Process the input ingredients
    input_ingr = functions.regex_nodigits_new(ingredients)
    input_vector = ct_vectorizer.transform([input_ingr])
    
    # Compute similarity scores
    input_lsa = lsa_10.transform(input_vector)
    scores = cosine_similarity(input_lsa, ct_lsa_10)
    
    # Get top 5 similar recipes
    indices = np.argsort(scores[0])[::-1][1:6]
    
    # Display the recommended recipes
    for idx in indices:
        print(f"Recipe Title: {data.iloc[idx]['Title']}")
        print(f"Ingredients: {data.iloc[idx]['Ingredients']}")
        print(f"Instructions: {data.iloc[idx]['Instructions']}\n")

# Example usage
recommend("chicken, potato, olive oil")

Recipe Title: Brown Bag Chicken
Ingredients: ['One 3- to 3 1/2-pound chicken', 'Sea salt and freshly ground black pepper', '1 onion, cut in half', '4 sprigs fresh rosemary', '1 tablespoon ground hot Hungarian paprika']
Instructions: Preheat the oven to 400°F.
Rinse the chicken, pat dry, and remove any excess fat. Sprinkle generously with salt and pepper to taste inside and out. Place the onion and rosemary inside the cavity and rub the outside of the chicken with the paprika.
Lay a standard-size brown paper grocery bag on its side and place the chicken inside, tying the top of the bag closed with kitchen string. Place the bagged chicken on a rimmed baking sheet in the lower third of the oven, so you have plenty of room at the top. Cook for 1 1/2 hours, remove the chicken from the oven, and carefully open the bag to release the steam. Check for doneness by inserting an internal thermometer in the thickest part of the thigh; it should read about 165°F and the juices should run clear when

In [2]:
import pickle
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import functions

def recommend_with_ingredient_match(ingredients):
    # Load precomputed data and models
    data = pickle.load(open('Artifacts/sl_RecipeList.pkl', 'rb'))
    ct_vectorizer = pickle.load(open('Artifacts/sl_ct_vectorizer.pkl', 'rb'))
    lsa_10 = pickle.load(open('Artifacts/sl_lsa_10.pkl', 'rb'))
    ct_lsa_10 = pickle.load(open('Artifacts/sl_ct_lsa_10.pkl', 'rb'))
    
    # Process the input ingredients
    input_ingr = functions.regex_nodigits_new(ingredients)
    input_vector = ct_vectorizer.transform([input_ingr])
    input_lsa = lsa_10.transform(input_vector)
    
    # Compute similarity scores
    scores = cosine_similarity(input_lsa, ct_lsa_10)
    
    # Get top 5 similar recipes
    indices = np.argsort(scores[0])[::-1][1:6]
    
    input_ingredients_set = set(input_ingr.split(", "))
    
    # Display the recommended recipes with ingredient match percentage
    for idx in indices:
        cleaned_recipe_ingredients = data.iloc[idx]['CleanIngredients']
        recipe_ingredients = data.iloc[idx]['Ingredients']
        
        cleaned_recipe_vector = ct_vectorizer.transform([cleaned_recipe_ingredients])
        cleaned_recipe_lsa = lsa_10.transform(cleaned_recipe_vector)
        
        matching_score = cosine_similarity(input_lsa, cleaned_recipe_lsa)[0][0]
        match_percentage = matching_score * 100
        
        print(f"Recipe Title: {data.iloc[idx]['Title']}")
        print(f"Ingredients: {recipe_ingredients}")
        print(f"Instructions: {data.iloc[idx]['Instructions']}")
        print(f"Match Percentage: {match_percentage:.2f}%\n")

# Example usage
recommend_with_ingredient_match("eggs, salt, flour, pepper, onion, chilies, chicken")

Recipe Title: DEEP FRYING SHRIMP WITH CORNSTARCH | UNIQUE PRAWNS FRY
Ingredients: ['-----For Shrimp-----\nShrimp/Prawn - 250g (after clean)\nGarlic paste - Â½ tablespoon\nGinger paste - Â½ teaspoon\nSemi thick tamarind juice - 2 tablespoons\nTurmeric powder - Â¼ teaspoon\nChili powder - Â½ teaspoon\nWater - 100ml\nSalt - As you want\n-----For Coating Batter-----\nAll-purpose flour - 75g\nCornstarch - 2 tablespoons\nGarlic paste - 1 teaspoon\nGinger paste - Â½ teaspoon\nTurmeric powder - Â¼ teaspoon\nBaking powder - Â¼ teaspoon\nSugar - Â¼ teaspoon\nWater - 150ml\nSalt - As you need']
Instructions: ['Rinse the shrimps well enough and peel off its skins except the tail portion.\nPrepare the ginger garlic paste and tamarind juice. Besides, get ready with water, turmeric powder, chili powder, and salt.\nPlace the cooking pan on the stove and add shrimps. On top of that, add turmeric powder, chili powder, ginger-garlic paste, tamarind juice, water, and salt. Give a mix and switch on the fla