In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import TruncatedSVD, NMF
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
import functions

# Set up for matplotlib inline
%matplotlib inline

# Load Data
rbdata_ar = pd.read_csv('CleanedRecipes.csv')
rbdata_ar.index = range(len(rbdata_ar))
data = rbdata_ar.loc[:, ['Title', 'Ingredients', 'Instructions','CleanIngredients','Image_Name']]
data = data.dropna(how='any')
data.index = range(len(data))

# Define stopwords
stopwords_nltk = stopwords.words('english')
#stopwords_nltk.extend(['ADVERTISEMENT', 'advertisement'])

# Process ingredients column
data_ingr = data['CleanIngredients'].apply(functions.regex_nodigits_new)
stopwords_ingr = stopwords_nltk + ['tablespoon', 'teaspoon', 'cup', 'ounce', 'pound', 'tablespoons', 'teaspoons', 'cups', 'ounces', 'pounds', 'inch', 'inches']

# Vectorize ingredients
corpus = data_ingr
ct_vectorizer = CountVectorizer(stop_words=stopwords_ingr)
ingr_ct = ct_vectorizer.fit_transform(corpus)

# Apply LSA
lsa_10 = TruncatedSVD(10)
ct_lsa_10 = lsa_10.fit_transform(ingr_ct)
functions.display_topics(lsa_10, ct_vectorizer.get_feature_names(), 20)

# Apply NMF
nmf_10 = NMF(n_components=10, init='nndsvda', max_iter=14000, tol=1e-4)
rb_nmf_10 = nmf_10.fit_transform(ingr_ct)
functions.display_topics(nmf_10, ct_vectorizer.get_feature_names(), 20)


Topic  0
salt, oil, pepper, kosher, olive, sugar, garlic, black, butter, juice, red, lemon, unsalted, thinly, leaves, cloves, white, vinegar, onion, vegetable

Topic  1
sugar, butter, unsalted, vanilla, cream, extract, baking, temperature, room, egg, salt, chocolate, eggs, milk, heavy, granulated, water, chilled, cinnamon, pure

Topic  2
juice, lemon, sugar, lime, oil, orange, thinly, zest, leaves, sauce, water, ginger, cilantro, peel, minced, mint, green, halved, seeds, soy

Topic  3
lemon, juice, salt, olive, pepper, kosher, zest, black, extravirgin, unsalted, sea, parsley, freshly, butter, orange, peel, atleaf, parmesan, coarse, chives

Topic  4
olive, oil, extravirgin, cheese, butter, parsley, garlic, unsalted, temperature, room, tomatoes, vanilla, cloves, extract, dry, italian, cream, baking, chocolate, wine

Topic  5
oil, kosher, salt, olive, vinegar, thinly, seeds, sugar, vegetable, extravirgin, sesame, baking, toasted, extract, vanilla, coconut, removed, spray, torn, chocolate

In [7]:
data_ingr

0        ['chicken kosher salt acorn sage rosemary unsa...
1        ['egg whites potatoes kosher salt black pepper...
2        ['milk milk garlic onion paprika black pepper ...
3        ['Italian loaf olive oil sweet Italian sausage...
4        ['sugar water oz bon oz lemon juice apple butt...
                               ...                        
13483    ['cocoa doubleacting baking salt eggs granulat...
13484    ['lemon butternut dice olive oil onion Israeli...
13485    ['Leftover katsuo bushi from making katsuo bus...
13486    ['unsalted butter baby spinach feta crumbled n...
13487    ['poblano chiles tomatoes juice garlic cloves ...
Name: CleanIngredients, Length: 13488, dtype: object

In [8]:
# Compute similarity matrix
similarity = cosine_similarity(ct_lsa_10)

In [9]:
pickle.dump(ct_vectorizer, open('Artifacts/ct_vectorizer.pkl', 'wb'))
pickle.dump(lsa_10, open('Artifacts/lsa_10.pkl', 'wb'))
pickle.dump(ct_lsa_10, open('Artifacts/ct_lsa_10.pkl', 'wb'))

In [10]:
pickle.dump(data, open('Artifacts/RecipeList.pkl', 'wb'))
pickle.dump(similarity, open('Artifacts/similarity.pkl', 'wb'))

In [13]:
# Function to recommend recipes
def recommend(ingredients):
    # Load precomputed data and similarity matrix
    data = pickle.load(open('Artifacts/RecipeList.pkl', 'rb'))
    similarity = pickle.load(open('Artifacts/similarity.pkl', 'rb'))
    
    # Process the input ingredients
    input_ingr = functions.regex_nodigits_new(ingredients)
    input_vector = ct_vectorizer.transform([input_ingr])
    
    # Compute similarity scores
    input_lsa = lsa_10.transform(input_vector)
    scores = cosine_similarity(input_lsa, ct_lsa_10)
    
    # Get top 5 similar recipes
    indices = np.argsort(scores[0])[::-1][1:6]
    
    # Display the recommended recipes
    for idx in indices:
        print(f"Recipe Title: {data.iloc[idx]['Title']}")
        print(f"Ingredients: {data.iloc[idx]['Ingredients']}")
        print(f"Instructions: {data.iloc[idx]['Instructions']}\n")

# Example usage
recommend("eggs, salt, flour, pepper, onion, chilies, chicken")

Recipe Title: Brown Bag Chicken
Ingredients: ['One 3- to 3 1/2-pound chicken', 'Sea salt and freshly ground black pepper', '1 onion, cut in half', '4 sprigs fresh rosemary', '1 tablespoon ground hot Hungarian paprika']
Instructions: Preheat the oven to 400°F.
Rinse the chicken, pat dry, and remove any excess fat. Sprinkle generously with salt and pepper to taste inside and out. Place the onion and rosemary inside the cavity and rub the outside of the chicken with the paprika.
Lay a standard-size brown paper grocery bag on its side and place the chicken inside, tying the top of the bag closed with kitchen string. Place the bagged chicken on a rimmed baking sheet in the lower third of the oven, so you have plenty of room at the top. Cook for 1 1/2 hours, remove the chicken from the oven, and carefully open the bag to release the steam. Check for doneness by inserting an internal thermometer in the thickest part of the thigh; it should read about 165°F and the juices should run clear when