In [31]:
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import joblib

from surprise import Dataset, Reader, accuracy, NormalPredictor, KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline, SVD, BaselineOnly, SVDpp, NMF, SlopeOne, CoClustering
from surprise.accuracy import rmse
from surprise.prediction_algorithms import SVD, SVDpp, NMF, BaselineOnly, NormalPredictor
from IPython.core.display import HTML
from surprise.model_selection.split import train_test_split as surprise_train_test_split
from surprise.model_selection import GridSearchCV, cross_validate

%matplotlib inline

In [3]:
user_ratings_df = pd.read_csv("./Data/user_reviews_no_zero.csv", index_col=[0])

In [37]:
recipes_df = pd.read_csv("./Data/recipes_subcat_cleaned 2.csv", index_col=[0])

In [38]:
recipes_df.head()

Unnamed: 0,name,recipe_id,minutes,description,ingredients,calories,total_fat_pdv,sugar_pdv,sodium_pdv,protein_pdv,saturated_fat_pdv,carbs_pdv,recipe_type
0,arriba baked winter squash mexican style,137739,55,"autumn is my favorite time of year to cook! this recipe \r\ncan be prepared either spicy or sweet, your choice!\r\ntwo of my posted mexican-inspired seasoning mix recipes are offered as suggestions.","['winter squash', 'mexican seasoning', 'mixed spice', 'honey', 'butter', 'olive oil', 'salt']",51.5,0.0,13.0,0.0,2.0,0.0,4.0,"['vegetarian', 'any', 'low cal', 'low carb']"
1,a bit different breakfast pizza,31490,30,this recipe calls for the crust to be prebaked a bit before adding ingredients. feel free to change sausage to ham or bacon. this warms well in the microwave for those late risers.,"['prepared pizza crust', 'sausage patty', 'eggs', 'milk', 'salt and pepper', 'cheese']",173.4,18.0,0.0,17.0,22.0,35.0,1.0,"['any', 'low cal', 'low carb', 'sugar free', 'not vegetarian']"
2,all in the kitchen chili,112140,130,this modified version of 'mom's' chili was a hit at our 2004 christmas party. we made an extra large pot to have some left to freeze but it never made it to the freezer. it was a favorite by all. perfect for any cold and rainy day. you won't find this one in a cookbook. it is truly an original.,"['ground beef', 'yellow onions', 'diced tomatoes', 'tomato paste', 'tomato soup', 'rotel tomatoes', 'kidney beans', 'water', 'chili powder', 'ground cumin', 'salt', 'lettuce', 'cheddar cheese']",269.8,22.0,32.0,48.0,39.0,27.0,5.0,"['any', 'low cal', 'low carb', 'not vegetarian']"
3,alouette potatoes,59389,45,"this is a super easy, great tasting, make ahead side dish that looks like you spent a lot more time preparing than you actually do. plus, most everything is done in advance. the times do not reflect the standing time of the potatoes.","['spreadable cheese with garlic and herbs', 'new potatoes', 'shallots', 'parsley', 'tarragon', 'olive oil', 'red wine vinegar', 'salt', 'pepper', 'red bell pepper', 'yellow bell pepper']",368.1,17.0,10.0,2.0,14.0,8.0,20.0,"['vegetarian', 'any', 'low cal']"
4,amish tomato ketchup for canning,44061,190,"my dh's amish mother raised him on this recipe. he much prefers it over store-bought ketchup. it was a taste i had to acquire, but now my ds's also prefer this type of ketchup. enjoy!","['tomato juice', 'apple cider vinegar', 'sugar', 'salt', 'pepper', 'clove oil', 'cinnamon oil', 'dry mustard']",352.9,1.0,337.0,23.0,3.0,0.0,28.0,"['vegetarian', 'any', 'low cal']"


In [7]:
user_ratings_df.head()

Unnamed: 0,user_id,recipe_id,date,rating,review
10,56680,79222,2006-11-11,5.0,"Oh, This was wonderful! Had a soup and salad ..."
11,183565,79222,2006-02-13,5.0,Wow! My family loves this recipe and it is a ...
12,101823,79222,2006-03-21,5.0,Excellent chowder. This was the perfect warm-...
13,446143,79222,2008-02-01,4.0,"Oh, how wonderful! I doubled the crab, and ad..."
14,226989,79222,2008-03-07,4.0,DH and I enjoyed this. However I used it only ...


In [39]:
#Clean the ingredients column so each row is a list of strings
recipes_df["ingredients"] = recipes_df["ingredients"].str.replace("'", "")
recipes_df["ingredients"] = recipes_df["ingredients"].str.strip("[")
recipes_df["ingredients"] = recipes_df["ingredients"].str.strip("]")
recipes_df["ingredients"] = recipes_df["ingredients"].str.split(", ")

In [40]:
#Clean the recipe_type column so each row is a list of strings
recipes_df["recipe_type"] = recipes_df["recipe_type"].str.replace("'", "")
recipes_df["recipe_type"] = recipes_df["recipe_type"].str.strip("[")
recipes_df["recipe_type"] = recipes_df["recipe_type"].str.strip("]")
recipes_df["recipe_type"] = recipes_df["recipe_type"].str.split(", ")

## Setting up Surprise

In [41]:
user_ratings_df[["user_id", "recipe_id"]] = user_ratings_df[["user_id", "recipe_id"]].astype(int)

In [42]:
rating_surprise_df = user_ratings_df[["user_id", "recipe_id", "rating"]]

In [43]:
reader = Reader(rating_scale=(1, 5))
surprise_data = Dataset.load_from_df(rating_surprise_df, reader)

In [44]:
trainset_full = surprise_data.build_full_trainset()

In [45]:
best_model = SVD(n_factors = 3,  n_epochs= 35, lr_all = .0025)
best_model.fit(trainset_full)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fd0aaa004c0>

In [32]:
#best_model_file = open("best_model_file.pkl", "wb")
#joblib.dump(best_model, best_model_file)
#best_model_file.close()

In [16]:
# Pickle the Model
#filename = "best_model.sav"
#pickle.dump(best_model, open(filename, "wb"))

In [46]:
## Subset data frame to show reviewers the products they have rated 

prior_ratings_df = pd.DataFrame(user_ratings_df.set_index("user_id"))
prior_ratings_df.drop(columns=["date", "rating", "review"], inplace=True)
prior_ratings_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 537267 entries, 56680 to 1122988
Data columns (total 1 columns):
 #   Column     Non-Null Count   Dtype
---  ------     --------------   -----
 0   recipe_id  537267 non-null  int64
dtypes: int64(1)
memory usage: 8.2 MB


## Creating Recommender Function 

In [47]:
# Create a new column that converts the list of ingredients to a set to improve efficiencies in the function
recipes_df["set_ingredients"] = recipes_df["ingredients"].apply(lambda x: set(x))


In [48]:
# Sanity check
type(recipes_df["set_ingredients"][0])

set

In [88]:
def recommended_recipes():
    #set up to not truncate display
    pd.set_option("display.max_colwidth", None)
    
    # Set up user inputs 
    user = int(input("user_id: "))
    ingredient_request = input("What ingredients do you need to use?  ")
    sub_cat_request = input("Recipe type? ")
    num = int(input("How many recipes would you like to see? "))
    
    # Set up dictionary to account for variations of ingredients 
    ingredient_variations = {"chicken" : ["chicken", "chicken breast"],
                        "tomato": ["tomato", "diced tomato", "crushed tomato", "chopped tomato", "heirloom tomato"],
                        "onion": ["onion","yellow onion", "red onion", "sweet onion", "vidailia onion", "chopped onion", "diced onion"],
                        "garlic": ["garlic", "peeled garlic", "crushed garlic", "garlic clove", "garlic cloves", "head of garlic", "minced garlic"],
                        "lettuce": ["lettuce", "lettuce leaf", "romaine lettuce", "red leaf lettuce", "garden lettuce"],
                        "shallot": ["shallot", "diced shallot", "quartered shallot", "chopped shallot"],
                        "potato": ["potato", "diced potato", "quartered potato", "mashed potato", "red potato", "yellow potato", "yukon gold potato", "gold potato"],
                        "shrimp": ["shrimp","raw shrimp", "deveined shrimp", "peeled shrimp"],
                        "celery": ["celery", "rib of celery", "chopped celery"],
                        "bell pepper": ["bell pepper", "green bell pepper", "red bell pepper", "diced pepper", "chopped pepper"]
                        }
    
    
    # Set up a list for each user of recipes alredy reviewed 
    reviewed = list(prior_ratings_df.loc[user,"recipe_id"])
    
    # Create a data frame that does not include recipes already reviewed 
    not_reviewed = recipes_df.copy()
    not_reviewed = not_reviewed[not_reviewed.recipe_id.isin(reviewed) == False]
    not_reviewed.reset_index(inplace=True)
            
            
    # Iterate through each row using a for loop to create list that contains recipe_ids tht match ingredient and type
    # Create the empty list of recipes
    recipes_w_ingredient_subcat = []
    #Set ingredient_request equal to the values in dict if ingredient_request is a key, if not set equal to itself
    #ingredient_request = ingredient_variations.get(ingredient_request, str(ingredient_request))
    #ingredient_request = set(ingredient_request)
    ingredient_request = ingredient_variations.get(ingredient_request, set([ingredient_request]))
    if type(ingredient_request) != set: 
        ingredient_request = set(ingredient_request)
    #loop through recipes finding where ingredient_request matches with set_ingredients and recipe_type
    for index, row in recipes_df.iterrows():
        if ingredient_request.intersection(row["set_ingredients"]) and sub_cat_request in row["recipe_type"]:
            recipes_w_ingredient_subcat.append(row["recipe_id"])
    
    # Create a copy of not_reviewed df that only contains recipe_ids that are in the recipe_w_ingredient_subcat list
    ingredient_recipes = not_reviewed.copy()
    ingredient_recipes = ingredient_recipes[ingredient_recipes.recipe_id.isin(recipes_w_ingredient_subcat)]
    ingredient_recipes.reset_index(inplace=True)
    

    # Create predicted rating for each recipe_id using the best_model, sort by highest rating
    ingredient_recipes["predicted_rating"] = ingredient_recipes["recipe_id"].apply(lambda x: best_model.predict(int(user), x).est)
    ingredient_recipes.sort_values(by="predicted_rating", ascending=False, inplace=True)
    ingredient_recipes = ingredient_recipes[["name", "minutes", "ingredients", "recipe_type", "predicted_rating"]] 
    
    return ingredient_recipes.head(num)
    
    

Now let's test out the function using the same user with a user_id = 462571 and see the different results for the ingredient tomato based on the recipe type. Note that the dictionary of ingredient_variations has worked as the results include recipes with chopped tomato, diced tomoato, etc. 

In [50]:
recommended_recipes()

user_id: 462571
What ingredients do you need to use?  tomato
Recipe type? vegetarian
How many recipes would you like to see? 4


Unnamed: 0,name,minutes,ingredients,recipe_type,predicted_rating
285,three cheese enchiladas,40,"[monterey jack cheese, cheddar cheese, cream cheese, pace picante sauce, red bell pepper, green onion, cumin, flour tortillas, lettuce, chopped tomato]","[vegetarian, any]",4.513537
149,layered hummus dip,15,"[hummus, cumin, chopped tomato, garlic salt, cucumber, fresh ground pepper, feta cheese, kalamata olives, parsley]","[vegetarian, any, low cal, low carb]",4.508063
113,garbanzo bean salad with feta cheese,15,"[garbanzo beans, seedless cucumber, chopped tomato, red onion, olive oil, red wine vinegar, capers, fresh parsley, dried oregano, salt and pepper, feta cheese, kalamata olive]","[vegetarian, any, low cal, low carb]",4.485478
57,chili cheese omelette burritos,15,"[chili, cheddar cheese, eggs, water, salt and pepper, butter, flour tortillas, onion, diced tomato, sour cream, salsa]","[vegetarian, any, low carb]",4.483624


In [51]:
recommended_recipes()

user_id: 462571
What ingredients do you need to use?  tomato
Recipe type? not vegetarian
How many recipes would you like to see? 4


Unnamed: 0,name,minutes,ingredients,recipe_type,predicted_rating
152,dorito taco salad,20,"[ground beef, lettuce, chopped tomato, cheddar cheese, red onion, taco seasoning, ranch style beans, catalina dressing, doritos]","[any, not vegetarian]",4.514272
363,quesadillas for one or two,15,"[oil, flour tortillas, cheddar cheese, chopped tomato, green chili, guacamole, green onion, green bell pepper, black olives, sour cream, salsa, refried beans, chicken, turkey, ham, beef, crabmeat, shrimp]","[any, low cal, low carb, not vegetarian]",4.502716
215,great white chili supposed to be by willie nelson,270,"[white beans, chicken broth, chicken base, onions, oil, garlic, diced green chilies, ground cumin, dried oregano leaves, cayenne pepper, cilantro, cooked chicken, sour cream, monterey jack cheese, green onion, chopped tomato]","[any, not vegetarian]",4.495441
438,steak or chicken fajitas,20,"[top sirloin steak, olive oil, lime juice, garlic clove, chili powder, cumin, hot pepper flakes, black pepper, salt, flour tortillas, onion, sweet peppers, salsa, sour cream, cheese, chopped tomato]","[any, low cal, low carb, not vegetarian]",4.488345


We can also run the function with different recipe ingredients for this user and see how the recommendations change. Let's try with chicken and beef and allow the subcategory to be equal to "any" - meaning any recipe type is allowed. 

In [52]:
recommended_recipes()

user_id: 462571
What ingredients do you need to use?  chicken
Recipe type? any
How many recipes would you like to see? 4


Unnamed: 0,name,minutes,ingredients,recipe_type,predicted_rating
399,chicken and cheese french bread pizza,30,"[butter, cheddar cheese, parmesan cheese, garlic, dried italian seasoning, french bread, chicken, salt, pepper, mozzarella cheese, red bell pepper, green onion]","[any, not vegetarian]",4.566741
976,couscous jambalaya,40,"[raw shrimp, chicken breast, creole seasoning, olive oil, andouille sausages, onion, green bell pepper, celery, garlic, italian plum tomato, bay leaves, worcestershire sauce, hot pepper sauce, chicken stock, salt, fresh ground black pepper, couscous, paprika, garlic powder, black pepper, onion powder, cayenne pepper, dried oregano leaves, dried leaf thyme]","[any, low cal, not vegetarian]",4.549372
96,autumn chicken salad,20,"[chicken, red seedless grapes, celery ribs, red delicious apple, pecan halves, mayonnaise, honey mustard, salt, pepper, lettuce leaf]","[any, low cal, low carb, not vegetarian]",4.528772
107,b stilla,130,"[butter, chicken, parsley, onion, saffron thread, whole almond, icing sugar, eggs, pepper, cinnamon, phyllo pastry]","[any, not vegetarian]",4.51894


In [87]:
recommended_recipes()

user_id: 462571
What ingredients do you need to use?  beef
Recipe type? any
How many recipes would you like to see? 4


Unnamed: 0,name,minutes,ingredients,recipe_type,predicted_rating
316,homemade stock,125,"[water, bay leaf, peppercorns, oregano, thyme, basil, dill, salt, onion, carrot, broccoli stem, spinach leaves, fresh parsley, celery, green beans, tomatoes, mushroom, garlic cloves, shallot, leek, turnip, chicken piece, beef]","[any, low cal, low carb, not vegetarian]",4.529465
88,beef ramen stir fry,20,"[beef, red bell pepper, green bell pepper, ramen noodles, snow peas, broccoli, teriyaki sauce, vegetable oil, garlic powder, ginger powder]","[any, low carb, not vegetarian]",4.527217
458,quesadillas for one or two,15,"[oil, flour tortillas, cheddar cheese, chopped tomato, green chili, guacamole, green onion, green bell pepper, black olives, sour cream, salsa, refried beans, chicken, turkey, ham, beef, crabmeat, shrimp]","[any, low cal, low carb, not vegetarian]",4.502716
304,hash,20,"[potatoes, onion, beef, salt and pepper, vegetable oil, butter]","[any, not vegetarian]",4.494467
