In [2]:
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


from surprise import Dataset, Reader, accuracy, NormalPredictor, KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline, SVD, BaselineOnly, SVDpp, NMF, SlopeOne, CoClustering
from surprise.accuracy import rmse
from surprise.prediction_algorithms import SVD, SVDpp, NMF, BaselineOnly, NormalPredictor
from IPython.core.display import HTML
from surprise.model_selection.split import train_test_split as surprise_train_test_split
from surprise.model_selection import GridSearchCV, cross_validate

%matplotlib inline

In [3]:
user_ratings_df = pd.read_csv("./Data/user_reviews_no_zero.csv", index_col=[0])

In [4]:
recipes_df = pd.read_csv("./Data/recipes_subcat_cleaned.csv", index_col=[0])

In [None]:
recipes_df.head()

In [5]:
#Clean the ingredients column so each row is a list of strings
recipes_df["ingredients"] = recipes_df["ingredients"].str.replace("'", "")
recipes_df["ingredients"] = recipes_df["ingredients"].str.strip("[")
recipes_df["ingredients"] = recipes_df["ingredients"].str.strip("]")
recipes_df["ingredients"] = recipes_df["ingredients"].str.split(", ")

In [6]:
#Clean the recipe_type column so each row is a list of strings
recipes_df["recipe_type"] = recipes_df["recipe_type"].str.replace("'", "")
recipes_df["recipe_type"] = recipes_df["recipe_type"].str.strip("[")
recipes_df["recipe_type"] = recipes_df["recipe_type"].str.strip("]")
recipes_df["recipe_type"] = recipes_df["recipe_type"].str.split(", ")

## Setting up Surprise

In [7]:
user_ratings_df[["user_id", "recipe_id"]] = user_ratings_df[["user_id", "recipe_id"]].astype(int)

In [8]:
rating_surprise_df = user_ratings_df[["user_id", "recipe_id", "rating"]]

In [9]:
reader = Reader(rating_scale=(1, 5))
surprise_data = Dataset.load_from_df(rating_surprise_df, reader)

In [10]:
trainset_full = surprise_data.build_full_trainset()

In [11]:
best_model = SVD(n_factors = 3,  n_epochs= 35, lr_all = .0025)
best_model.fit(trainset_full)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fa8ef530370>

In [12]:
## Subset data frame to show reviewers the products they have rated 

prior_ratings_df = pd.DataFrame(user_ratings_df.set_index("user_id"))
prior_ratings_df.drop(columns=["date", "rating", "review"], inplace=True)
prior_ratings_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 537267 entries, 56680 to 1122988
Data columns (total 1 columns):
 #   Column     Non-Null Count   Dtype
---  ------     --------------   -----
 0   recipe_id  537267 non-null  int64
dtypes: int64(1)
memory usage: 8.2 MB


## Creating Recommender Function 

In [None]:
recipes_df.head(1)

In [None]:
not_reviewed = recipes_df.copy()
not_reviewed = not_reviewed[not_reviewed.recipe_id.isin(reviewed) == False]
not_reviewed.reset_index(inplace=True)

In [13]:
# Create a new column that converts the list of ingredients to a set to improve efficiencies in the function
recipes_df["set_ingredients"] = recipes_df["ingredients"].apply(lambda x: set(x))


In [14]:
# Sanity check
type(recipes_df["set_ingredients"][0])

set

In [None]:
not_reviewed = recipes_df.copy()

In [None]:
not_reviewed = not_reviewed[not_reviewed.recipe_id.isin(reviewed) == False]
not_reviewed.reset_index(inplace=True)
    

In [None]:
user = 183565
ingredient_request = "onions"
sub_cat_request = "vegetarian"

In [None]:
recipes_w_ingredient = []
for index, row in recipes_df.iterrows():
    if ingredient_request in row["set_ingredients"]:
        recipes_w_ingredient.append(row["recipe_id"])
 

In [None]:
recipes_w_ingredient_subcat = []
for index, row in recipes_df.iterrows():
    if ingredient_request in row["set_ingredients"] and sub_cat_request in row["recipe_type"]:
        recipes_w_ingredient_subcat.append(row["recipe_id"])

In [None]:
len(recipes_w_ingredient_subcat)

In [None]:
ingredient_recipes = not_reviewed.copy()
#loc, iloc statement (conditional with loc where value is in the list)
#ingredient_recipes = 
ingredient_recipes = ingredient_recipes[ingredient_recipes.recipe_id.isin(recipes_w_ingredient)] #== True]
ingredient_recipes.reset_index(inplace=True)
    

In [None]:
ingredient_recipes

In [None]:
ingredient_recipes["predicted_rating"] = ingredient_recipes["recipe_id"].apply(lambda x: best_model.predict(user, x).est)
ingredient_recipes.sort_values(by="predicted_rating", ascending=False, inplace=True)
ingredient_recipes = ingredient_recipes[["name", "minutes", "description", "ingredients", "recipe_type", "predicted_rating"]] 

In [None]:
predictions = best_model.predict(183565, 99024)

In [None]:
predictions

In [None]:
user1 = user_ratings_df[user_ratings_df["user_id"] == "827374"]


In [None]:
user1["rating"].mode()

In [None]:
user_ratings_df["rating"].mean()

In [None]:
user_ratings_df.head()

In [None]:
def recommended_recipes():
    #set up to not truncate display
    pd.set_option("display.max_colwidth", None)
    
    # Set up user inputs 
    user = int(input("user_id: "))
    ingredient_request = input("What ingredients do you need to use?  ")
    sub_cat_request = input("Recipe type? ")
    num = int(input("How many recipes would you like to see? "))
    
    # Set up dictionary to account for variations of ingredients 
    
    
    
    # Set up a list for each user of recipes alredy reviewed 
    reviewed = list(prior_ratings_df.loc[user,"recipe_id"])
    
    # Create a data frame that does not include recipes already reviewed 
    not_reviewed = recipes_df.copy()
    not_reviewed = not_reviewed[not_reviewed.recipe_id.isin(reviewed) == False]
    not_reviewed.reset_index(inplace=True)
            
            
    # Iterate through each row using a for loop to create list that contains recipe_ids tht match ingredient and type
    recipes_w_ingredient_subcat = []
    for index, row in recipes_df.iterrows():
        if ingredient_request in row["set_ingredients"] and sub_cat_request in row["recipe_type"]:
            recipes_w_ingredient_subcat.append(row["recipe_id"])
    
    # Create a copy of not_reviewed df that only contains recipe_ids that are in the recipe_w_ingredient_subcat list
    ingredient_recipes = not_reviewed.copy()
    ingredient_recipes = ingredient_recipes[ingredient_recipes.recipe_id.isin(recipes_w_ingredient_subcat)]
    ingredient_recipes.reset_index(inplace=True)
    

    # Create predicted rating for each recipe_id using the best_model, sort by highest rating
    ingredient_recipes["predicted_rating"] = ingredient_recipes["recipe_id"].apply(lambda x: best_model.predict(int(user), x).est)
    ingredient_recipes.sort_values(by="predicted_rating", ascending=False, inplace=True)
    ingredient_recipes = ingredient_recipes[["name", "minutes", "ingredients", "recipe_type", "predicted_rating"]] 
    
    return ingredient_recipes.head(num)
    
    

In [None]:
recommended_recipes()

In [None]:
recommended_recipes()

## Working on Function

In [None]:
print(sorted(recipes_df["ingredients"][0:50]))

In [23]:
ingredient_request = "chicken"
sub_cat_request = "non vegetarian"
recipes_w_ingredient_subcat = []

In [15]:
# Setting up dictionary to account for variations of ingredients 

ingredient_variations = {"chicken" : ["chicken", "chicken breast"],
                        "tomato": ["tomato", "diced tomato", "crushed tomato", "chopped tomato", "heirloom tomato"],
                        "onion": ["onion","yellow onion", "red onion", "sweet onion", "vidailia onion", "chopped onion", "diced onion"],
                        "garlic": ["garlic", "peeled garlic", "crushed garlic", "garlic clove", "garlic cloves", "head of garlic", "minced garlic"],
                        "lettuce": ["lettuce", "lettuce leaf", "romaine lettuce", "red leaf lettuce", "garden lettuce"],
                        "shallot": ["shallot", "diced shallot", "quartered shallot", "chopped shallot"],
                        "potato": ["potato", "diced potato", "quartered potato", "mashed potato", "red potato", "yellow potato", "yukon gold potato", "gold potato"],
                        "shrimp": ["shrimp","raw shrimp", "deveined shrimp", "peeled shrimp"],
                        "celery": ["celery", "rib of celery", "chopped celery"],
                         }

In [24]:
for index, row in recipes_df.iterrows():
    if ingredient_request in ingredient_variations.keys() 
    
    if set(ingredient_request).intersection(ingredient_variations) in row["set_ingredients"] and sub_cat_request in row["recipe_type"]:
        recipes_w_ingredient_subcat.append(row["recipe_id"])

In [25]:
recipes_w_ingredient_subcat

[]