In [1]:
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


from surprise import Dataset, Reader, accuracy, NormalPredictor, KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline, SVD, BaselineOnly, SVDpp, NMF, SlopeOne, CoClustering
from surprise.accuracy import rmse
from surprise.prediction_algorithms import SVD, SVDpp, NMF, BaselineOnly, NormalPredictor
from IPython.core.display import HTML
from surprise.model_selection.split import train_test_split as surprise_train_test_split
from surprise.model_selection import GridSearchCV, cross_validate

%matplotlib inline

In [2]:
user_ratings_df = pd.read_csv("./Data/user_reviews_no_zero.csv", index_col=[0])

In [3]:
recipes_df = pd.read_csv("./Data/recipes_subcat_cleaned.csv", index_col=[0])

In [4]:
recipes_df.head()

Unnamed: 0,name,recipe_id,minutes,description,ingredients,calories,total_fat_pdv,sugar_pdv,sodium_pdv,protein_pdv,saturated_fat_pdv,carbs_pdv,recipe_type
0,arriba baked winter squash mexican style,137739,55,autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",51.5,0.0,13.0,0.0,2.0,0.0,4.0,"['vegetarian', 'low cal', 'low carb']"
1,a bit different breakfast pizza,31490,30,this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",173.4,18.0,0.0,17.0,22.0,35.0,1.0,"['low cal', 'low carb', 'sugar free', 'not veg..."
2,all in the kitchen chili,112140,130,this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",269.8,22.0,32.0,48.0,39.0,27.0,5.0,"['low cal', 'low carb', 'not vegetarian']"
3,alouette potatoes,59389,45,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",368.1,17.0,10.0,2.0,14.0,8.0,20.0,"['vegetarian', 'low cal']"
4,amish tomato ketchup for canning,44061,190,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",352.9,1.0,337.0,23.0,3.0,0.0,28.0,"['vegetarian', 'low cal']"


In [16]:
user_ratings_df.head()

Unnamed: 0,user_id,recipe_id,date,rating,review
10,56680,79222,2006-11-11,5.0,"Oh, This was wonderful! Had a soup and salad ..."
11,183565,79222,2006-02-13,5.0,Wow! My family loves this recipe and it is a ...
12,101823,79222,2006-03-21,5.0,Excellent chowder. This was the perfect warm-...
13,446143,79222,2008-02-01,4.0,"Oh, how wonderful! I doubled the crab, and ad..."
14,226989,79222,2008-03-07,4.0,DH and I enjoyed this. However I used it only ...


In [5]:
#Clean the ingredients column so each row is a list of strings
recipes_df["ingredients"] = recipes_df["ingredients"].str.replace("'", "")
recipes_df["ingredients"] = recipes_df["ingredients"].str.strip("[")
recipes_df["ingredients"] = recipes_df["ingredients"].str.strip("]")
recipes_df["ingredients"] = recipes_df["ingredients"].str.split(", ")

In [6]:
#Clean the recipe_type column so each row is a list of strings
recipes_df["recipe_type"] = recipes_df["recipe_type"].str.replace("'", "")
recipes_df["recipe_type"] = recipes_df["recipe_type"].str.strip("[")
recipes_df["recipe_type"] = recipes_df["recipe_type"].str.strip("]")
recipes_df["recipe_type"] = recipes_df["recipe_type"].str.split(", ")

## Setting up Surprise

In [7]:
user_ratings_df[["user_id", "recipe_id"]] = user_ratings_df[["user_id", "recipe_id"]].astype(int)

In [8]:
rating_surprise_df = user_ratings_df[["user_id", "recipe_id", "rating"]]

In [9]:
reader = Reader(rating_scale=(1, 5))
surprise_data = Dataset.load_from_df(rating_surprise_df, reader)

In [10]:
trainset_full = surprise_data.build_full_trainset()

In [11]:
best_model = SVD(n_factors = 3,  n_epochs= 35, lr_all = .0025)
best_model.fit(trainset_full)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f8acfb6be50>

In [12]:
## Subset data frame to show reviewers the products they have rated 

prior_ratings_df = pd.DataFrame(user_ratings_df.set_index("user_id"))
prior_ratings_df.drop(columns=["date", "rating", "review"], inplace=True)
prior_ratings_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 537267 entries, 56680 to 1122988
Data columns (total 1 columns):
 #   Column     Non-Null Count   Dtype
---  ------     --------------   -----
 0   recipe_id  537267 non-null  int64
dtypes: int64(1)
memory usage: 8.2 MB


## Creating Recommender Function 

In [14]:
# Create a new column that converts the list of ingredients to a set to improve efficiencies in the function
recipes_df["set_ingredients"] = recipes_df["ingredients"].apply(lambda x: set(x))


In [14]:
# Sanity check
type(recipes_df["set_ingredients"][0])

set

In [44]:
def recommended_recipes():
    #set up to not truncate display
    pd.set_option("display.max_colwidth", None)
    
    # Set up user inputs 
    user = int(input("user_id: "))
    ingredient_request = input("What ingredients do you need to use?  ")
    sub_cat_request = input("Recipe type? ")
    num = int(input("How many recipes would you like to see? "))
    
    # Set up dictionary to account for variations of ingredients 
    ingredient_variations = {"chicken" : ["chicken", "chicken breast"],
                        "tomato": ["tomato", "diced tomato", "crushed tomato", "chopped tomato", "heirloom tomato"],
                        "onion": ["onion","yellow onion", "red onion", "sweet onion", "vidailia onion", "chopped onion", "diced onion"],
                        "garlic": ["garlic", "peeled garlic", "crushed garlic", "garlic clove", "garlic cloves", "head of garlic", "minced garlic"],
                        "lettuce": ["lettuce", "lettuce leaf", "romaine lettuce", "red leaf lettuce", "garden lettuce"],
                        "shallot": ["shallot", "diced shallot", "quartered shallot", "chopped shallot"],
                        "potato": ["potato", "diced potato", "quartered potato", "mashed potato", "red potato", "yellow potato", "yukon gold potato", "gold potato"],
                        "shrimp": ["shrimp","raw shrimp", "deveined shrimp", "peeled shrimp"],
                        "celery": ["celery", "rib of celery", "chopped celery"],
                         }
    
    
    # Set up a list for each user of recipes alredy reviewed 
    reviewed = list(prior_ratings_df.loc[user,"recipe_id"])
    
    # Create a data frame that does not include recipes already reviewed 
    not_reviewed = recipes_df.copy()
    not_reviewed = not_reviewed[not_reviewed.recipe_id.isin(reviewed) == False]
    not_reviewed.reset_index(inplace=True)
            
            
    # Iterate through each row using a for loop to create list that contains recipe_ids tht match ingredient and type
    # Create the empty list of recipes
    recipes_w_ingredient_subcat = []
    #Set ingredient_request equal to the values in dict if ingredient_request is a key, if not set equal to itself
    ingredient_request = ingredient_variations.get(ingredient_request, str(ingredient_request))
    ingredient_request = set(ingredient_request)
    #loop through recipes finding where ingredient_request matches with set_ingredients and recipe_type
    for index, row in recipes_df.iterrows():
        if ingredient_request.intersection(row["set_ingredients"]) and sub_cat_request in row["recipe_type"]:
            recipes_w_ingredient_subcat.append(row["recipe_id"])
    
    # Create a copy of not_reviewed df that only contains recipe_ids that are in the recipe_w_ingredient_subcat list
    ingredient_recipes = not_reviewed.copy()
    ingredient_recipes = ingredient_recipes[ingredient_recipes.recipe_id.isin(recipes_w_ingredient_subcat)]
    ingredient_recipes.reset_index(inplace=True)
    

    # Create predicted rating for each recipe_id using the best_model, sort by highest rating
    ingredient_recipes["predicted_rating"] = ingredient_recipes["recipe_id"].apply(lambda x: best_model.predict(int(user), x).est)
    ingredient_recipes.sort_values(by="predicted_rating", ascending=False, inplace=True)
    ingredient_recipes = ingredient_recipes[["name", "minutes", "ingredients", "recipe_type", "predicted_rating"]] 
    
    return ingredient_recipes.head(num)
    
    

In [45]:
recommended_recipes()

user_id: 446143
What ingredients do you need to use?  tomato
Recipe type? low carb
How many recipes would you like to see? 20


Unnamed: 0,name,minutes,ingredients,recipe_type,predicted_rating
92,chili cheese omelette burritos,15,"[chili, cheddar cheese, eggs, water, salt and pepper, butter, flour tortillas, onion, diced tomato, sour cream, salsa]","[vegetarian, low carb]",5.0
246,layered hummus dip,15,"[hummus, cumin, chopped tomato, garlic salt, cucumber, fresh ground pepper, feta cheese, kalamata olives, parsley]","[vegetarian, low cal, low carb]",5.0
346,quesadillas for one or two,15,"[oil, flour tortillas, cheddar cheese, chopped tomato, green chili, guacamole, green onion, green bell pepper, black olives, sour cream, salsa, refried beans, chicken, turkey, ham, beef, crabmeat, shrimp]","[low cal, low carb, not vegetarian]",5.0
233,italian nachos,25,"[wonton wrappers, egg white, olive oil, oregano, italian sausage, four cheese pasta sauce, chopped tomato, green onion, sliced ripe olives, pickled jalapeno peppers, mozzarella cheese, red pepper flakes]","[low cal, low carb, not vegetarian]",5.0
415,steak or chicken fajitas,20,"[top sirloin steak, olive oil, lime juice, garlic clove, chili powder, cumin, hot pepper flakes, black pepper, salt, flour tortillas, onion, sweet peppers, salsa, sour cream, cheese, chopped tomato]","[low cal, low carb, not vegetarian]",5.0
62,caponata eggplant and lots of good things,50,"[rosemary oil, eggplant, celery, onion, garlic cloves, diced tomato, sun-dried tomato packed in oil, black olives, capers, salt, sugar, pine nuts, balsamic vinegar, fresh basil, fresh oregano]","[vegetarian, low cal, low carb]",4.99467
186,garbanzo bean salad with feta cheese,15,"[garbanzo beans, seedless cucumber, chopped tomato, red onion, olive oil, red wine vinegar, capers, fresh parsley, dried oregano, salt and pepper, feta cheese, kalamata olive]","[vegetarian, low cal, low carb]",4.993823
172,feta marinated,20,"[feta cheese, garlic cloves, peppercorn, coriander seeds, capers, bay leaf, olive oil, fresh thyme sprig, toast, chopped tomato]","[vegetarian, low cal, low carb]",4.992957
120,crock pot steak fajitas,195,"[boneless sirloin, vegetable oil, lemon juice, garlic clove, ground cumin, seasoning salt, chili powder, green bell pepper, onion, flour tortillas, cheddar cheese, salsa, guacamole, sour cream, lettuce, chopped tomato]","[vegetarian, low cal, low carb]",4.989981
175,fiery pork loin,350,"[boneless pork loin roast, chipotle chiles in adobo, adobo sauce, ground cumin, ground black pepper, dried onion flakes, diced tomato, garlic cloves, orange peel, fresh orange juice, fresh lime juice, white vinegar, dried oregano, salt]","[low carb, not vegetarian]",4.97496
