In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
embedder = SentenceTransformer("all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


# Generating Recommendations

The final model will consist of a pair of matrices $U, R$ where the dot product of the $i$ th row of $U$ and $j$ th row of $R$ is equal to our prediction for how likely user $i$ is to interact with recipe $j$.

Suppose we have a real user trying to obtain a recommendation. 

We will take the following into account to produce a list of recipes:
1) The user's history (a list of recipes they say they liked).
2) A user query. This will consist of two parts:
* A string $s$ - freeform text that describes what they want today.
* A real number $x$ between -1 and 1, where $x = -1$ indicates they want to try something different from recipes they've previously tried, $x = 0$ indicates they don't care whether the recipe is new or not, they just want it to match the query, and 1 indicates they want the recipe to be similar to recipes they like.
3) A list of filters (include/exclude certain keywords).

Given this data, we do the following:

1)  We use the filters (3) to discard a large chunk of recipes. This will speed things up.
2) We use the user's history (1) to obtain a vector $u$ that represents $U$ - that way, we can treat the user like one of the users from the dataset. We can do this using KNN. This allows to assign a score to each recipe that indicates how similar the recipe is to other recipes that user has interacted with.
3) We convert the string $s$ (first part of 2) to a vector using a sentence transformer. This allows us to assign a score to each recipe based on how similar it is to the query.
4) We obtain a final score by taking a weighted linear of the scores in 2), 3), where the weight is determined by $x$.

If $x = 0$ we only use 3); if $x = \pm 1$ we only use 2) to obtain the final ranking (if $x = -1$ the ranking is reversed).

In [45]:
recipes_full = pd.read_parquet('../data/recipes.parquet')
reviews = pd.read_pickle('../data/revdense.pk')

In [18]:
recipes=recipes.loc[recipes.index.isin(reviews.RecipeId)]

In [46]:
recipes_full=recipes_full.loc[recipes_full.index.isin(reviews.RecipeId)]

In [20]:
recipes.Description = 

56        I've made this one with stale toast bread it w...
62        This is popular with the kids especially. My h...
76        Make and share this Carrot Cake II recipe from...
153       Make and share this Chinese Plum Sauce recipe ...
198       Make and share this Sorrel Tarragon Sauce reci...
                                ...                        
486659    The tomato, native to the Americas, has made i...
494435    Make and share this Gorilla Punch Green Smooth...
494784    Lovely syrup for a light ginger ale. Recently ...
494972    From Cooking Light August 2008. The key to obt...
495275    From &quot;The Indian Slow Cooker&quot; by Anu...
Name: Description, Length: 5610, dtype: object

Unnamed: 0,RecipeId,Name,AuthorId,AuthorName,CookTime,PrepTime,TotalTime,DatePublished,Description,Images,...,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeServings,RecipeYield,RecipeInstructions
56,97.0,"Capsicum, Tomato and Crunchy Bread Salad",1543,Doreen Randal,PT40M,PT1H10M,PT1H50M,1999-08-24 04:44:00+00:00,I've made this one with stale toast bread it w...,[],...,3.6,0.0,11.1,12.4,3.2,7.4,2.3,6.0,,"[DRESSING:Separate garlic bulbs into cloves, d..."
62,104.0,Cheeseburger Casserole,1535,Marg CaymanDesigns,PT25M,PT15M,PT40M,1999-08-19 05:30:00+00:00,This is popular with the kids especially. My h...,[https://img.sndimg.com/food/image/upload/w_55...,...,11.3,71.9,1795.1,44.8,1.5,12.6,25.5,6.0,,[Combine ground beef and flour in skillet. Add...
76,120.0,Carrot Cake II,1538,Duckie067,PT1H,PT15M,PT1H15M,1999-09-19 06:21:00+00:00,Make and share this Carrot Cake II recipe from...,[],...,19.4,154.7,720.2,129.9,3.7,89.4,11.5,8.0,,"[Preheat oven to 325°., Add 1 egg at a time to..."
153,203.0,Chinese Plum Sauce,1533,Dancer,PT1H45M,PT10M,PT1H55M,1999-08-06 00:41:00+00:00,Make and share this Chinese Plum Sauce recipe ...,[https://img.sndimg.com/food/image/upload/w_55...,...,0.1,0.0,295.1,79.7,5.6,71.9,2.8,,4 cups,"[In large heavy saucepan, bring plums, onions,..."
198,252.0,Sorrel Tarragon Sauce,1554,Jacques Lorrain,PT4H,PT25M,PT4H25M,1999-10-03 23:28:00+00:00,Make and share this Sorrel Tarragon Sauce reci...,[https://img.sndimg.com/food/image/upload/w_55...,...,19.1,90.3,933.4,36.3,0.7,11.9,5.3,,1 1/2 cups,"[Mix all ingredients in medium bowl., Season w..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
486659,504629.0,Florida Tomato Basil Linguini,2140614,Douglas Poe,PT15M,PT40M,PT55M,2013-07-25 23:31:00+00:00,"The tomato, native to the Americas, has made i...",[https://img.sndimg.com/food/image/upload/w_55...,...,4.1,0.0,19.4,96.0,6.8,9.4,17.2,4.0,,"[Wash and rinse tomatoes., Dry tomatoes, then ..."
494435,512614.0,Gorilla Punch Green Smoothie,37449,Sharon123,,PT10M,PT10M,2014-01-25 18:44:00+00:00,Make and share this Gorilla Punch Green Smooth...,[],...,0.9,0.0,65.3,49.2,8.2,24.6,7.1,2.0,4-5 cups,[. Blend kale and liquid together first (this ...
494784,512968.0,Ginger Ale Syrup for SodaStream,586037,lastrohm,PT10M,PT10M,PT20M,2014-02-03 22:17:00+00:00,Lovely syrup for a light ginger ale. Recently ...,[],...,0.0,0.0,10.0,164.3,0.4,160.3,0.4,,2 1/2 Cups,"[Put all ingredients into a sauce pan., Bring ..."
494972,513157.0,Seared Scallops With Fresh Linguine and Romano...,1436160,GibbyLou,PT15M,PT10M,PT25M,2014-02-09 20:18:00+00:00,From Cooking Light August 2008. The key to obt...,[],...,2.7,95.8,854.0,41.5,0.3,0.2,30.5,4.0,,"[Cook pasta according to package directions, o..."


In [21]:
recdescvecs = embedder.encode(recipes.Description.values)

In [70]:
recipes[]

Unnamed: 0_level_0,DescriptionVec
RecipeId,Unnamed: 1_level_1
97.0,"(-0.060451053, -0.0012286116, -0.01307298, -0...."
104.0,"(-0.006824123, -0.0066656657, 0.039665323, 0.0..."
120.0,"(-0.047035363, 0.0076129567, -0.0053549134, 0...."
203.0,"(-0.101459354, -0.05800118, 0.02029691, 0.0628..."
252.0,"(-0.09888437, 0.035358243, -0.07992559, 0.0474..."
...,...
504629.0,"(-0.07465568, -0.003981607, -0.057263907, 0.05..."
512614.0,"(-0.16249561, -0.053354904, 0.064926036, 0.093..."
512968.0,"(-0.07747272, -0.07021076, -0.014108114, -0.01..."
513157.0,"(-0.044779465, -0.043939754, 0.03749913, 0.052..."


In [22]:
rec_id_to_vec = {(recipes.RecipeId.values[i]):recdescvecs[i] for i in range(len(recipes))}

In [27]:
recipes.set_index('RecipeId',inplace=True)
recipes['DescriptionVec'] = [tuple(t) for t in recdescvecs]
recipes = recipes[['DescriptionVec']]

In [47]:
recipes_full['Name']

56                 Capsicum, Tomato and Crunchy Bread Salad
62                                   Cheeseburger Casserole
76                                           Carrot Cake II
153                                      Chinese Plum Sauce
198                                   Sorrel Tarragon Sauce
                                ...                        
486659                        Florida Tomato Basil Linguini
494435                         Gorilla Punch Green Smoothie
494784                      Ginger Ale Syrup for SodaStream
494972    Seared Scallops With Fresh Linguine and Romano...
495275                   Goan Black-Eyed Peas (Slow Cooker)
Name: Name, Length: 5610, dtype: object

In [30]:
recipes.to_parquet('../data/embedded_recipe_descriptions.parquet')

In [31]:
reviews['Dummy'] = 1
mat_df = pd.pivot_table(data=reviews,index='UserId',columns='RecipeId',values='Dummy',fill_value = 0)
user_ids_ordered = list(mat_df.index)
recipe_ids_ordered = list(mat_df.columns)
user_id_to_index = {user_ids_ordered[i]:i for i in range(len(user_ids_ordered))}
recipe_id_to_index = {recipe_ids_ordered[i]:i for i in range(len(recipe_ids_ordered))}
cf_mat = mat_df.values



In [34]:
def get_matrix_fac(mat,rank):
    U, D, Vh = np.linalg.svd([r-np.mean(r) for r in cf_mat],full_matrices=False)
    rtdiag = np.diag([np.sqrt(s) for s in D])
    U_mat = np.matmul(U,rtdiag)
    R_mat = np.matmul(rtdiag,Vh)
    newrmat = R_mat[:rank]
    newumat = np.transpose(np.transpose(U_mat)[:rank])
    return newumat,newrmat

In [35]:
U6mat,R6mat = get_matrix_fac(cf_mat,6)

In [65]:
np.transpose(R6mat)[0]

array([-0.05408717,  0.06129862,  0.02235218, -0.10842328, -0.0745006 ,
        0.01663549])

In [32]:
user_mean_mat = cf_mat-[r-np.mean(r) for r in cf_mat]
U, D, Vh = np.linalg.svd([r-np.mean(r) for r in cf_mat],full_matrices=False)
rtdiag = np.diag([np.sqrt(s) for s in D])
U_mat = np.matmul(U,rtdiag)
R_mat = np.matmul(rtdiag,Vh)

In [60]:
def get_ranking(desc_vec_df:pd.DataFrame,user_vec:list,Rmat:np.array,query:str,al:float):
    recipe_id_list = list(desc_vec_df.index)
    recipe_ids = [rid for rid in recipe_id_list if rid in recipe_id_to_index]
    recipe_cf_scores = np.array([
        np.dot(user_vec,Rmat[recipe_id_to_index[rid]]) 
        for rid in recipe_ids])
    query_vec = embedder.encode(query)
    recipe_qu_scores = np.array([
        np.dot(query_vec,desc_vec_df['DescriptionVec'][rid])
        for rid in recipe_ids
    ])
    recipe_cf_scores = recipe_cf_scores/np.std(recipe_cf_scores)
    recipe_qu_scores = recipe_qu_scores/np.std(recipe_qu_scores)
    combined_scores = al*recipe_cf_scores+(1-abs(al))*recipe_qu_scores
    inds = [i for i in range(len(recipe_ids))]
    inds.sort(key=lambda x:combined_scores[x],reverse=True)
    return [int(recipe_ids[i] )for i in inds]
    

In [61]:
get_ranking(recipes,U6mat[23],np.transpose(R6mat),'Chocolate cake dessert',0.5)

[48241,
 29935,
 64015,
 45609,
 101041,
 11799,
 24672,
 29478,
 13745,
 30425,
 71876,
 12210,
 21178,
 33649,
 18053,
 14226,
 25969,
 25628,
 56407,
 59772,
 57771,
 12431,
 16438,
 109423,
 15668,
 12591,
 17509,
 43332,
 52285,
 34569,
 66032,
 46773,
 135123,
 39337,
 52206,
 14199,
 90032,
 32740,
 14026,
 34257,
 78237,
 37861,
 71109,
 17985,
 55262,
 13906,
 56366,
 43813,
 13546,
 44853,
 35593,
 146022,
 28501,
 52251,
 73011,
 17118,
 56260,
 26287,
 43115,
 14756,
 41218,
 43068,
 11633,
 96994,
 24799,
 13612,
 17309,
 26526,
 12509,
 46286,
 43936,
 18597,
 49596,
 19322,
 29493,
 26451,
 14398,
 30745,
 30325,
 35065,
 14681,
 94198,
 95312,
 87698,
 52348,
 24154,
 23052,
 23597,
 11657,
 23116,
 25625,
 29833,
 45070,
 16479,
 70179,
 29074,
 17823,
 248350,
 105830,
 67293,
 55768,
 93596,
 26168,
 29977,
 42071,
 5108,
 121064,
 14381,
 28559,
 18255,
 118880,
 25388,
 18200,
 17977,
 11424,
 70097,
 32817,
 13285,
 24175,
 31314,
 115110,
 32349,
 31300,
 22981,


In [48]:
def rec_ids_to_names(rid:int):
    return recipes_full['Name'][rid]

In [69]:
[rec_ids_to_names(rid) for rid in get_ranking(recipes,U6mat[23],np.transpose(R6mat),'Chocolate cake',0.1)]

['Red Curry with Vegetables',
 'Cherry Brandy',
 'Springtime Chicken Salad',
 'Bacon & Cheese Stuffed Chicken',
 'Meringue Cloud Hearts',
 'Fresh Tomato Salsa',
 'Creamy Orange Salad Dressing',
 'No Bake Cherry Cheesecake Pie',
 'Korean Roast Chicken Thighs',
 "Nadia's Quick Chicken Curry",
 'Mix Mushrooms Noodles',
 'Cranberry-Orange Trifle',
 'Mediterranean Lemon Chicken',
 'Mini Meat Loaves',
 'Quick Homemade Ravioli',
 'Halloween Cupcakes',
 'Cream Cheese Flan',
 'Zesty Caesar Mix',
 'Walnut Rum Pastry Cookies',
 'Cajun Style Blackened Catfish',
 'Frozen Daiquiri',
 'Best Ever Peanut Butter Fudge',
 "Easy Hershey's Bar Swirl Cake and Glaze",
 'Lemon Pepper Chicken Breasts on a Bed of Rice',
 'Pea Soup',
 'Kiwi Parfait',
 'Eggless Date and Walnut Cake',
 'Berginie Linguine Sauce for Two',
 'Chocolate Chip Cookies',
 'Zucchini with yoghurt',
 'Turkey Avocado Jalapeno Club',
 'Cranberry Butter',
 'chicken-jalapeno pizza',
 'Minestrone',
 'Apple Doughnuts (Baked)',
 'Party Favorite BLT