<h1 style = "text-align: center">Collaborative Filtering Data Model</h1>

<h3 style = "text-align: center">Food.com Recipe Recommender - SOEN 471 (Big Data Analytics)</h3>

## Objective:
The objective of this notebook is to create a recommender system data model that recommend recipes based on user preferences using collaborative filtering.

In [1]:
import os
import json
import seaborn
import numpy as np
import pandas as pd 
import dask.array as da
import dask.dataframe as dd
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity

# accessing directory
for dirname, _, filenames in os.walk('./clean_data'):
    for filename in filenames:
        os.path.join(dirname, filename)

## Reading files:

In [2]:
# Removing column titled "Unnamed: 0"
training = dd.read_csv("./clean_data/interactions_TRAIN.csv").drop("Unnamed: 0", axis=1)
testing = dd.read_csv("./clean_data/interactions_TEST.csv").drop("Unnamed: 0", axis=1)
recipes = dd.read_csv("./clean_data/recipes.csv").drop("Unnamed: 0", axis=1)

## Print Training Data

In [3]:
training.head()

Unnamed: 0,user_id,recipe_id,date,rating,review
0,59801,8060,2007-12-11,5,This really is a great basic meatball recipe. ...
1,104295,38353,2005-02-08,5,Very yummy. I quartered the recipe and theref...
2,58104,162999,2007-03-05,5,OK this may be a great Canadian drink! But whe...
3,2000207498,2886,2015-05-16,5,Great recipe. I made a lot of changes though a...
4,41695,27082,2002-06-10,4,"Very easy to make and I liked it, my kids didn..."


In [4]:
recipes[recipes['recipe_id'] == 0].compute()

Unnamed: 0,name,recipe_id,minutes,contributor_id,submitted,tags,n_steps,steps,description,ingredients,n_ingredients,Calories,Total_fat_PDV,Sugar_PDV,Sodium_PDV,Protein_PDV,Saturated_fat_PDV,Carbohydrates_PDV


## Print Recipes Data

In [5]:
recipes.head()

Unnamed: 0,name,recipe_id,minutes,contributor_id,submitted,tags,n_steps,steps,description,ingredients,n_ingredients,Calories,Total_fat_PDV,Sugar_PDV,Sodium_PDV,Protein_PDV,Saturated_fat_PDV,Carbohydrates_PDV
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7,51.5,0.0,13.0,0.0,2.0,0.0,4.0
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6,173.4,18.0,0.0,17.0,22.0,35.0,1.0
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13,269.8,22.0,32.0,48.0,39.0,27.0,5.0
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11,368.1,17.0,10.0,2.0,14.0,8.0,20.0
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,352.9,1.0,337.0,23.0,3.0,0.0,28.0


## Add user rating column per recipe

In [6]:
recipes_ratings = dd.merge(recipes, training)
recipes_ratings.head(5)

Unnamed: 0,name,recipe_id,minutes,contributor_id,submitted,tags,n_steps,steps,description,ingredients,...,Total_fat_PDV,Sugar_PDV,Sodium_PDV,Protein_PDV,Saturated_fat_PDV,Carbohydrates_PDV,user_id,date,rating,review
0,rita s chex peanut butter candy,128930,15,203467,2005-07-07,"['15-minutes-or-less', 'time-to-make', 'course...",5,"['in a large pot melt butter , peanut butter c...",another recipe from a co-worker. it's peanut b...,"['corn chex', 'butter', 'peanut butter chips',...",...,33.0,162.0,21.0,20.0,48.0,23.0,235468,2005-10-19,5,These are great! My mom used to make these wh...
1,rita s chex peanut butter candy,128930,15,203467,2005-07-07,"['15-minutes-or-less', 'time-to-make', 'course...",5,"['in a large pot melt butter , peanut butter c...",another recipe from a co-worker. it's peanut b...,"['corn chex', 'butter', 'peanut butter chips',...",...,33.0,162.0,21.0,20.0,48.0,23.0,254633,2005-10-26,0,I have this recipe and i made it last yr for c...
2,rita s chicken piccata,311085,35,32058,2008-07-01,"['60-minutes-or-less', 'time-to-make', 'course...",11,"['season with salt / pepper', 'dust chicken wi...",great recipe to have for company.,"['boneless skinless chicken breasts', 'flour',...",...,57.0,3.0,20.0,64.0,61.0,1.0,461834,2010-08-11,5,Delicious chicken piccata!!! I made exactly a...
3,rita s christmas cheese logs,142518,1470,86764,2005-10-24,"['time-to-make', 'course', 'main-ingredient', ...",4,['allow all ingredients to come to room temper...,"pop's secretary, rita, is one of the best cook...","['cream cheese', 'mustard', 'worcestershire sa...",...,51.0,3.0,17.0,22.0,81.0,1.0,239758,2007-06-18,5,Rita is a fantastic cook if this recipe is any...
4,rita s famous dressing balls,137336,750,110135,2005-09-13,"['time-to-make', 'course', 'preparation', 'occ...",9,['place bread in a large bowl and let sit over...,these are a tradition in my mother-in-law's fa...,"['stale bread', 'butter', 'onions', 'celery', ...",...,20.0,9.0,14.0,7.0,38.0,6.0,201581,2005-10-10,5,I love stuffing and I will make these again. I...


## Making User-Item Matrix

In [7]:
training = training.head(90000)
ratings_matrix = training.pivot_table(index='recipe_id',columns='user_id',values='rating')
ratings_matrix = ratings_matrix.fillna(0)
ratings_matrix.head(15)

user_id,1533,1535,1634,1773,1792,1891,2046,2059,2178,2310,...,2002361851,2002362510,2002362634,2002365467,2002366577,2002366607,2002368308,2002368940,2002371792,2002372706
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
39,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
40,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
41,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
46,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
49,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
55,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
58,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
62,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
63,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
81,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Performing Cosine Similarity

In [None]:
recipe_similarity = cosine_similarity(ratings_matrix)
np.fill_diagonal(recipe_similarity, 0) 
recipe_similarity

## Distribution of n_ingredients and n_steps

In [None]:
fig,ax = plt.subplots(1, 2, figsize=(15, 4))
seaborn.distplot(recipes["n_ingredients"],ax=ax[0])
seaborn.distplot(recipes["n_steps"],ax=ax[1])

## Relationship between different variables in a dataset:

In [None]:
fig,ax = plt.subplots(1, 2, figsize=(15,4))
seaborn.scatterplot(data=recipes_ratings, x="n_ingredients", y="Calories", hue="rating", ax=ax[0])
seaborn.scatterplot(data=recipes_ratings, x="n_ingredients", y="n_steps", hue="rating", ax=ax[1]) 

## Item-Item Recommendation Class:

In [None]:
class ItemItem_Recommendations:
    
    def __init__(self, recipes):
        self.recipes = recipes
    
    def recommend_five_recipes(self, recipe_id):
        allRecipes = self.recipes

        # Select the columns containing the nutritional information
        nutritional_columns = ['Calories', 'Total_fat_PDV', 'Sugar_PDV', 'Sodium_PDV', 'Protein_PDV', 'Saturated_fat_PDV', 'Carbohydrates_PDV']

        # Fill missing values with zeros
        allRecipes[nutritional_columns] = allRecipes[nutritional_columns].fillna(0)

        # If the recipe_id is NOT present in allRecipes
        if recipe_id not in list(allRecipes['recipe_id'].values):
            print("recipe_id not found")
            return []
        # If the recipe_id is present in allRecipes
        else:
            # Print the information of the passed recipe_id
            recipe_info = allRecipes[allRecipes['recipe_id'] == recipe_id][['name', 'Calories', 'Total_fat_PDV', 'Sugar_PDV', 'Sodium_PDV', 'Protein_PDV', 'Saturated_fat_PDV', 'Carbohydrates_PDV']].to_dict('records')[0]
            print(json.dumps(recipe_info, indent=4))

            # Compute the cosine similarities between the recipes
            recipe_nutrition = da.from_array(np.vstack(allRecipes[nutritional_columns].values))
            similarity_scores = cosine_similarity(recipe_nutrition, recipe_nutrition[allRecipes.index[allRecipes['recipe_id'] == recipe_id]])

            # Get the top 5 most similar recipes
            top_recipe_indices = np.argsort(similarity_scores.ravel())[::-1][1:6]
            top_recipe_ids = allRecipes.iloc[top_recipe_indices]['recipe_id'].values.tolist()

            # Create a dictionary for each recipe containing its name and nutritional information
            recommended_recipes = []
            for recipe_id in top_recipe_ids:
                recipe_info = {}
                recipe_info['name'] = allRecipes[allRecipes['recipe_id'] == recipe_id]['name'].values[0]
                recipe_info['nutritional_info'] = allRecipes[allRecipes['recipe_id'] == recipe_id][nutritional_columns].to_dict('records')[0]
                recommended_recipes.append(recipe_info)

            # Return the recommended recipes
            print("_____ The Recommmended Top 5 Most Similar Recipes _____")
            return recommended_recipes

## Testing the model given a rando recipe_id:

In [None]:
model = ItemItem_Recommendations(recipes.head(10000))
model.recommend_five_recipes(137739)