#### Library Imports
---

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import scipy.sparse as sp
from joblib import Parallel, delayed
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
recipes = pd.read_csv("recipes_w_keywords.csv")
reviews = pd.read_csv("reviews_trimmed.csv")

In [None]:
reviews

In [None]:
recipes

In [None]:
def add_recipes(df, name, keywords):
    if name not in df["Name"]:
        print("Recipe name already exists, please use a different name")
        return
    
    cols = {key: 0 for key in df.iloc[:, 1:].columns}
    new_recipe_number = df["RecipeId"].tail(1).iloc[0]
    
    cols["Name"] = name
    cols["RecipeId"] = new_recipe_number+1

    for word in keywords:
        if word in cols:
            cols[word] = 1

    df.loc[len(df)] = cols
    return df

In [None]:
recipes = add_recipes(recipes, "Birthday Cake", ["Birthday"])

In [None]:
recipes

In [None]:
def add_review(df, recipe, author, rating):
    possible_ratings = [1, 2, 3, 4, 5]
    
    if recipe not in df["RecipeId"]:
        print("Recipe not found")
        return

    if rating not in possible_ratings:
        print("Rating not possible, please input 1, 2, 3, 4 or 5")
        return

    if len(df[(df['RecipeId'] == recipe) & (df['AuthorId'] == author)]) > 0:
        index = df[(df['RecipeId'] == recipe) & (df['AuthorId'] == author)].index
        print(index[0])
        df.at[index[0], "Rating"] = rating
        return df

    df.loc[len(df)] = {"RecipeId": recipe, "AuthorId": author, "Rating": rating}

In [None]:
reviews

#### Recipes with many reviews
---

##### Limiting recipe to be used in the recommendation system to those that have at least 20 reviews from reviews that have made at least 15 reviews themselves

In [None]:
def min_review_reviewers(min_rev, min_auth):
    author_review_counts = reviews.groupby('AuthorId').size()
    author_review_count_greater_than = author_review_counts[author_review_counts > min_auth]
    
    recipe_review_count = reviews.groupby('RecipeId').size()
    recipe_review_count_greater_than = recipe_review_count[recipe_review_count > min_rev]
    
    recipes_with_many_reviews = reviews[reviews['AuthorId'].isin(author_review_count_greater_than.index)]
    recipes_with_many_reviews = recipes_with_many_reviews[recipes_with_many_reviews['RecipeId'].isin(recipe_review_count_greater_than.index)]
    
    recipes_with_many_reviews.reset_index(inplace=True)
    
    recipes_with_many_reviews = recipes_with_many_reviews.drop('index', axis=1)
    return recipes_with_many_reviews

In [None]:
recipes_with_many_reviews = min_review_reviewers(19, 9)

#### Cold-start solution
---

##### To aid easing in the cold start problem we have decided to randomly select a subset of 10 recipes for the user from the top 10% reviewd items on the app. We will then start incorporating a more personalized recommendation when we have enough information about the user

In [None]:
def obtain_top_recipes(percentage):
    average_rating = recipes_with_many_reviews.groupby("RecipeId").mean("Rating")
    percentile = average_rating['Rating'].quantile(percentage)
    top_n_percent_recipes = average_rating[average_rating['Rating'] >= percentile]
    return top_n_percent_recipes

In [None]:
top_recipes = obtain_top_recipes(.9)

In [None]:
top_recipes.sample(n=10)

### User-User 
---

#### A user-user collaborative system in which the similarity between users is found based on a cosin similarity. This similarity is then used to find similar intrest and find common recipies the user will like.

In [3]:
def compute_user_similarity():
    authorIds = recipes_with_many_reviews["AuthorId"].unique()
    sorted_authorIds = np.sort(authorIds)
    reviews_pivoted = recipes_with_many_reviews.pivot(index='AuthorId', columns='RecipeId', values='Rating').fillna(0)
    return pd.DataFrame(cosine_similarity(reviews_pivoted), index=reviews_pivoted.index, columns=reviews_pivoted.index)

In [4]:
def recommend_recipes(user_id,  top_n=10):
    similar_users = similarity_matrix.loc[user_id].sort_values(ascending=False)[1:10] 
    most_similar_users = similar_users[similar_users > .5]

    recommended_recipes = []
    for similar_user in most_similar_users.index:
      top_recipes = recipes_with_many_reviews[recipes_with_many_reviews['AuthorId'] == similar_user].sort_values('Rating', ascending=False)

    user_recipes = recipes_with_many_reviews[recipes_with_many_reviews["AuthorId"] == user_id]["RecipeId"]
    top_recipes = top_recipes[~top_recipes['RecipeId'].isin(list(user_recipes))]

    recommended_items = list(set(recommended_recipes))[:top_n]
    return recipes[recipes['RecipeId'].isin(list(top_recipes["RecipeId"]))]

In [None]:
similarity_matrix = compute_user_similarity()

In [None]:
similarity_matrix

##### Test
---

In [None]:
rando_df = recipes_with_many_reviews[recipes_with_many_reviews["AuthorId"]==1533].head(10)[["RecipeId", "Rating"]]

In [None]:
recipe_test = list(rando_df["RecipeId"])

In [None]:
rating_test = list(rando_df["Rating"])

In [None]:
for i in range(len(recipe_test)):
    add_review(reviews, recipe_test[i], 111111111111111, rating_test[i])

In [None]:
recipes_with_many_reviews[recipes_with_many_reviews["AuthorId"]==1533]

In [None]:
recommended_recipes = recommend_recipes(111111111111111, 10)
recommended_recipes

### Item-Item Recommendation
---

#### An item-item recomender used to be able to find similar recipes to one another. This can be used to recommend simmilar recipies to the ones users liked. 

In [5]:
recipes_w_keywords = recipes.loc[:, recipes.columns != 'Name']

In [56]:
def compute_recipe_similarity(recipes_w_keywords, lower, upper, intervals=10):
    part = len(recipes_w_keywords)//intervals
    
    if abs(lower - upper) != 1:
        print("lower and upper must be one away")
        return

    if lower < 0 or upper > 10:
        print("values for uper and lower are out of bound")
        return 
        
    partial_df = recipes_w_keywords.iloc[part*lower:part*upper]
    final_similarities = cosine_similarity(partial_df.iloc[:, 1:])

    return final_similarities

In [50]:
def recommend_recipes(recipe_id, recipe_sim, num_recommendations=5):
    idx = recipes_w_keywords[recipes_w_keywords['RecipeId'] == recipe_id].index[0]
    sim_scores = list(enumerate(recipe_sim[idx%len(recipe_sim)]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]

    recipe_indices = [i[0] for i in sim_scores]
    return recipes_w_keywords['RecipeId'].iloc[recipe_indices], sim_scores


In [None]:
compute_recipe_similarity(recipes_w_keywords)

In [6]:
recipes_w_keywords

Unnamed: 0,RecipeId,Chicken Stews,Cucumber,Birthday,Toddler Friendly,Quail,Canning,Hanukkah,Weeknight,Vegetable,...,Steam,Bean Soup,For Large Groups Holiday/Event,Szechuan,Pennsylvania Dutch,Grapes,White Rice,Nigerian,Spicy,Pressure Cooker
0,38,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,39,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,40,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,41,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
4,42,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
522512,541379,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
522513,541380,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
522514,541381,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
522515,541382,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


##### Test
---

In [7]:
part = len(recipes_w_keywords)//10

In [8]:
partial_df = recipes_w_keywords.iloc[:part]

In [9]:
partial_df

Unnamed: 0,RecipeId,Chicken Stews,Cucumber,Birthday,Toddler Friendly,Quail,Canning,Hanukkah,Weeknight,Vegetable,...,Steam,Bean Soup,For Large Groups Holiday/Event,Szechuan,Pennsylvania Dutch,Grapes,White Rice,Nigerian,Spicy,Pressure Cooker
0,38,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,39,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,40,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,41,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
4,42,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52246,56400,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1
52247,56401,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
52248,56402,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
52249,56403,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
recipe_similarities_1 = cosine_similarity(partial_df.iloc[:, 1:])

In [13]:
recipe_similarities_1

array([[1.        , 0.11785113, 0.59628479, ..., 0.42163702, 0.        ,
        0.        ],
       [0.11785113, 1.        , 0.        , ..., 0.1118034 , 0.57735027,
        0.28867513],
       [0.59628479, 0.        , 1.        , ..., 0.56568542, 0.18257419,
        0.        ],
       ...,
       [0.42163702, 0.1118034 , 0.56568542, ..., 1.        , 0.38729833,
        0.12909944],
       [0.        , 0.57735027, 0.18257419, ..., 0.38729833, 1.        ,
        0.16666667],
       [0.        , 0.28867513, 0.        , ..., 0.12909944, 0.16666667,
        1.        ]])

In [22]:
recommended_recipes, scores = recommend_recipes(50, recipe_similarities_1, 10)
print(recommended_recipes)
print(scores)

326        393
31360    34989
46981    50998
363        434
3692      6433
5512      8481
7023     10144
16371    19702
17266    20614
17645    21000
Name: RecipeId, dtype: int64
[(326, 1.0), (31360, 1.0), (46981, 1.0), (363, 0.912870929175277), (3692, 0.912870929175277), (5512, 0.912870929175277), (7023, 0.912870929175277), (16371, 0.912870929175277), (17266, 0.912870929175277), (17645, 0.912870929175277)]


In [23]:
recipes.iloc[12]

RecipeId                          50
Name               Biscotti Di Prato
Chicken Stews                      0
Cucumber                           0
Birthday                           0
                         ...        
Grapes                             0
White Rice                         0
Nigerian                           0
Spicy                              0
Pressure Cooker                    0
Name: 12, Length: 316, dtype: object

In [24]:
recipes.iloc[recommended_recipes.index]

Unnamed: 0,RecipeId,Name,Chicken Stews,Cucumber,Birthday,Toddler Friendly,Quail,Canning,Hanukkah,Weeknight,...,Steam,Bean Soup,For Large Groups Holiday/Event,Szechuan,Pennsylvania Dutch,Grapes,White Rice,Nigerian,Spicy,Pressure Cooker
326,393,Chocolate and Vanilla Chip Biscotti,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
31360,34989,Gourmet Cappuccino Biscotti,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
46981,50998,Fig Newton Biscotti,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
363,434,Cherry-Pistachio Biscotti,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3692,6433,Spritzgeback (Spritz Cookies),0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
5512,8481,Kahlua Almond Biscotti,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
7023,10144,Triple Chocolate Biscotti,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
16371,19702,Kourabiedes (Greek Cookies),0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
17266,20614,Hazelnut Biscotti,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
17645,21000,Italian Wine Biscuits,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [25]:
partial_df_2 = recipes_w_keywords.iloc[part:part*2]

In [26]:
partial_df_2

Unnamed: 0,RecipeId,Chicken Stews,Cucumber,Birthday,Toddler Friendly,Quail,Canning,Hanukkah,Weeknight,Vegetable,...,Steam,Bean Soup,For Large Groups Holiday/Event,Szechuan,Pennsylvania Dutch,Grapes,White Rice,Nigerian,Spicy,Pressure Cooker
52251,56405,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
52252,56406,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
52253,56407,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
52254,56408,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
52255,56409,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104497,110180,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
104498,110181,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
104499,110182,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
104500,110183,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [57]:
recipe_similarities_2 = compute_recipe_similarity(recipes_w_keywords, 1, 2, intervals=10)

In [58]:
recipe_similarities_2

array([[1.        , 0.        , 0.        , ..., 0.2236068 , 0.2236068 ,
        0.33333333],
       [0.        , 1.        , 0.        , ..., 0.31622777, 0.        ,
        0.23570226],
       [0.        , 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.2236068 , 0.31622777, 0.        , ..., 1.        , 0.        ,
        0.4472136 ],
       [0.2236068 , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.33333333, 0.23570226, 0.        , ..., 0.4472136 , 0.        ,
        1.        ]])

In [28]:
recipe_similarities_2

array([[1.        , 0.        , 0.        , ..., 0.2236068 , 0.2236068 ,
        0.33333333],
       [0.        , 1.        , 0.        , ..., 0.31622777, 0.        ,
        0.23570226],
       [0.        , 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.2236068 , 0.31622777, 0.        , ..., 1.        , 0.        ,
        0.4472136 ],
       [0.2236068 , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.33333333, 0.23570226, 0.        , ..., 0.4472136 , 0.        ,
        1.        ]])

In [59]:
recommended_recipes, scores = recommend_recipes(56408, recipe_similarities_2, 5)
print(recommended_recipes)
print(scores)

17812    21167
26059    29591
39258    43057
51902    56047
7737     10867
Name: RecipeId, dtype: int64
[(17812, 1.0000000000000002), (26059, 1.0000000000000002), (39258, 1.0000000000000002), (51902, 1.0000000000000002), (7737, 0.8660254037844388)]


In [60]:
recipes.iloc[52254]

RecipeId                    56408
Name               Prawn Mexicana
Chicken Stews                   0
Cucumber                        0
Birthday                        0
                        ...      
Grapes                          0
White Rice                      0
Nigerian                        0
Spicy                           1
Pressure Cooker                 0
Name: 52254, Length: 316, dtype: object

In [61]:
recipes.iloc[recommended_recipes.index+len(recipe_similarities_1)]

Unnamed: 0,RecipeId,Name,Chicken Stews,Cucumber,Birthday,Toddler Friendly,Quail,Canning,Hanukkah,Weeknight,...,Steam,Bean Soup,For Large Groups Holiday/Event,Szechuan,Pennsylvania Dutch,Grapes,White Rice,Nigerian,Spicy,Pressure Cooker
70063,74737,Barbecued Shrimp,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
78310,83281,Firecracker Shrimp,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
91509,96832,Boiled Shrimp With Spicy Butter Sauce,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
104153,109826,Barbecued Cajun Shrimp,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
59988,64339,Rellenos Jaibas,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


#### Extra Work (Not Pertinant as of now)
---

In [None]:
def density(df):
    total_cells = df.size
    cells_greater_than_zero = df[df > 0].count().sum()
    return (cells_greater_than_zero / total_cells) * 100

In [None]:
print(density(reviews_pivoted))