In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.metrics.pairwise import cosine_similarity 
from sklearn.metrics.pairwise import manhattan_distances
from sklearn.metrics.pairwise import euclidean_distances

%matplotlib inline

In [2]:
master_df = pd.read_csv('data/master_df_for_modeling.csv')

In [3]:
master_df.head()

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,department,aisle,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order
0,2,33120,1,1,Organic Egg Whites,86,16,dairy eggs,eggs,202279,prior,3,Friday,9,8.0
1,2,28985,2,1,Michigan Organic Kale,83,4,produce,fresh vegetables,202279,prior,3,Friday,9,8.0
2,2,17794,6,1,Carrots,83,4,produce,fresh vegetables,202279,prior,3,Friday,9,8.0
3,2,9327,3,0,Garlic Powder,104,13,pantry,spices seasonings,202279,prior,3,Friday,9,8.0
4,2,45918,4,1,Coconut Butter,19,13,pantry,oils vinegars,202279,prior,3,Friday,9,8.0


In [4]:
master_df.shape

(32434489, 15)

## Building a Recommender System Using master_Df & cosine similarity

In [5]:
master_df['reordered'].value_counts()

1    19126536
0    13307953
Name: reordered, dtype: int64

In [6]:
master_reorders = master_df[master_df['reordered'] == 1]

In [7]:
master_reorders['volume_higher'] = (master_reorders['product_id'].value_counts().sort_values(ascending=False) > 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [8]:
master_reorders['volume_higher'].value_counts()

True     25731
False     1761
Name: volume_higher, dtype: int64

In [9]:
reorders = master_reorders[master_reorders['reordered'] == 1]
reorders.head()
print(reorders.shape)

(19126536, 16)


In [10]:
high_volume = reorders[reorders['volume_higher'] == True]


In [11]:
high_volume_users = high_volume.groupby(['user_id', 'product_name']).size().sort_values(ascending=False).unstack().fillna(0)

In [12]:
high_volume['reordered'].value_counts()

1    25731
Name: reordered, dtype: int64

In [13]:
# read in prior_orders_cleaned
prior_orders_clean = pd.read_csv('data/prior_orders_cleaned_df.csv')
prior_orders_clean.head()

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,department,aisle
0,2,33120,1,1,Organic Egg Whites,86,16,dairy eggs,eggs
1,22,23341,6,1,Large Grade AA Eggs,86,16,dairy eggs,eggs
2,25,6383,7,1,All Whites 100% Egg Whites,86,16,dairy eggs,eggs
3,26,33120,5,0,Organic Egg Whites,86,16,dairy eggs,eggs
4,55,11520,12,0,Large Alfresco Eggs,86,16,dairy eggs,eggs


In [14]:
orders = pd.read_csv('./data/orders.csv')

In [15]:
orders2 = orders[['order_id', 'user_id']]

In [16]:
user_orders = reorders.merge(orders2, on='order_id')

In [17]:
user_orders['high_volume'] = (user_orders['product_id'].value_counts().sort_values(ascending=False) > 1)
user_orders.shape

(19126536, 18)

In [18]:
high_volume = user_orders[user_orders['high_volume'] == True]
high_volume.shape
high_volume.head()

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,department,aisle,user_id_x,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order,volume_higher,user_id_y,high_volume
1,2,28985,2,1,Michigan Organic Kale,83,4,produce,fresh vegetables,202279,prior,3,Friday,9,8.0,True,202279,True
2,2,17794,6,1,Carrots,83,4,produce,fresh vegetables,202279,prior,3,Friday,9,8.0,True,202279,True
3,2,45918,4,1,Coconut Butter,19,13,pantry,oils vinegars,202279,prior,3,Friday,9,8.0,True,202279,True
4,2,40141,7,1,Original Unflavored Gelatine Mix,105,13,pantry,doughs gelatins bake mixes,202279,prior,3,Friday,9,8.0,True,202279,True
5,2,1819,8,1,All Natural No Stir Creamy Almond Butter,88,13,pantry,spreads,202279,prior,3,Friday,9,8.0,True,202279,True


In [19]:
high_volume['reordered'].value_counts()

1    42377
Name: reordered, dtype: int64

In [20]:
high_volume_users = high_volume.groupby(['user_id_x', 'product_name']).size().sort_values(ascending=False).unstack().fillna(0)
high_volume_users

product_name,#2 Coffee Filters,0% Fat Black Cherry Greek Yogurt y,0% Fat Blueberry Greek Yogurt,0% Fat Free Organic Milk,0% Fat Organic Greek Vanilla Yogurt,0% Greek Yogurt Black Cherry on the Bottom,"0% Greek, Blueberry on the Bottom Yogurt",1 % Lowfat Milk,1 Apple + 1 Pear Fruit Bar,1 Liter,...,"Zucchini Squash, Baby Courgette",ZzzQuil Liquid Warming Berry Flavor Sleep-Aid,for Tots Apple Juice,smartwater® Electrolyte Enhanced Water,vitaminwater® XXX Acai Blueberry Pomegranate,with Crispy Almonds Cereal,with Olive Oil Mayonnaise,with Olive Oil Mayonnaise Dressing,with Xylitol Minty Sweet Twist 18 Sticks Sugar Free Gum,with a Splash of Mango Coconut Water
user_id_x,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
46,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
68,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
137,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
140,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206165,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
206193,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
206206,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
206207,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
recommender_cosine = pairwise_distances(high_volume_users, metric='cosine')
recommender_cosine.shape

(4833, 4833)

In [22]:
recommender_corr = pairwise_distances(high_volume_users, metric='correlation')
recommender_corr.shape

(4833, 4833)

In [23]:
recommender_cosine_df = pd.DataFrame(recommender_cosine, columns=high_volume_users.index, index=high_volume_users.index)
recommender_cosine_df

user_id_x,7,46,68,137,140,150,155,226,396,417,...,206025,206038,206103,206104,206105,206165,206193,206206,206207,206208
user_id_x,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7,3.330669e-16,1.000000e+00,1.000000e+00,1.000000e+00,1.000000,1.0,1.0,1.000000,1.000000,1.0,...,1.000000,1.000000,1.000000,1.000000,1.0,1.000000e+00,1.0,1.000000,1.000000,1.000000e+00
46,1.000000e+00,3.330669e-16,1.000000e+00,1.000000e+00,1.000000,1.0,1.0,0.857143,1.000000,1.0,...,1.000000,0.866369,1.000000,1.000000,1.0,1.000000e+00,1.0,1.000000,1.000000,1.000000e+00
68,1.000000e+00,1.000000e+00,2.220446e-16,1.000000e+00,1.000000,1.0,1.0,1.000000,1.000000,1.0,...,1.000000,1.000000,1.000000,1.000000,1.0,1.000000e+00,1.0,1.000000,1.000000,1.000000e+00
137,1.000000e+00,1.000000e+00,1.000000e+00,1.110223e-16,1.000000,1.0,1.0,1.000000,1.000000,1.0,...,1.000000,1.000000,1.000000,1.000000,1.0,1.000000e+00,1.0,1.000000,1.000000,1.000000e+00
140,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,0.000000,1.0,1.0,1.000000,0.894591,1.0,...,0.903775,1.000000,1.000000,0.937006,1.0,1.000000e+00,1.0,0.882149,1.000000,9.254644e-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206165,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000,1.0,1.0,1.000000,1.000000,1.0,...,1.000000,0.866369,1.000000,1.000000,1.0,3.330669e-16,1.0,1.000000,1.000000,1.000000e+00
206193,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000,1.0,1.0,1.000000,1.000000,1.0,...,1.000000,1.000000,1.000000,1.000000,1.0,1.000000e+00,0.0,1.000000,1.000000,1.000000e+00
206206,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,0.882149,1.0,1.0,1.000000,1.000000,1.0,...,1.000000,1.000000,1.000000,1.000000,1.0,1.000000e+00,1.0,0.000000,1.000000,1.000000e+00
206207,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000,1.0,1.0,1.000000,1.000000,1.0,...,1.000000,1.000000,1.000000,1.000000,1.0,1.000000e+00,1.0,1.000000,0.000000,8.709006e-01


In [24]:
recommender_corr_df = pd.DataFrame(recommender_corr, columns=high_volume_users.index, index=high_volume_users.index)
recommender_corr_df

user_id_x,7,46,68,137,140,150,155,226,396,417,...,206025,206038,206103,206104,206105,206165,206193,206206,206207,206208
user_id_x,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7,0.000000,1.000824,1.000440,1.000985,1.001322,1.000539,1.000881,1.000824,1.000696,1.000539,...,1.000763,1.000881,1.000539,1.001166,1.000824,1.000824,1.000763,1.000623,1.000763,1.000985
46,1.000824,0.000000,1.000440,1.000985,1.001322,1.000539,1.000881,0.857849,1.000696,1.000539,...,1.000763,0.867132,1.000539,1.001166,1.000824,1.000824,1.000763,1.000623,1.000763,1.000985
68,1.000440,1.000440,0.000000,1.000526,1.000707,1.000288,1.000471,1.000440,1.000372,1.000288,...,1.000408,1.000471,1.000288,1.000623,1.000440,1.000440,1.000408,1.000333,1.000408,1.000526
137,1.000985,1.000985,1.000526,0.000000,1.001581,1.000645,1.001053,1.000985,1.000833,1.000645,...,1.000912,1.001053,1.000645,1.001394,1.000985,1.000985,1.000912,1.000745,1.000912,1.001178
140,1.001322,1.001322,1.000707,1.001581,0.000000,1.000865,1.001414,1.001322,0.895565,1.000865,...,0.904863,1.001414,1.000865,0.938758,1.001322,1.001322,1.001224,0.882996,1.001224,0.926922
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206165,1.000824,1.000824,1.000440,1.000985,1.001322,1.000539,1.000881,1.000824,1.000696,1.000539,...,1.000763,0.867132,1.000539,1.001166,1.000824,0.000000,1.000763,1.000623,1.000763,1.000985
206193,1.000763,1.000763,1.000408,1.000912,1.001224,1.000499,1.000816,1.000763,1.000645,1.000499,...,1.000706,1.000816,1.000499,1.001079,1.000763,1.000763,0.000000,1.000577,1.000706,1.000912
206206,1.000623,1.000623,1.000333,1.000745,0.882996,1.000408,1.000666,1.000623,1.000526,1.000408,...,1.000577,1.000666,1.000408,1.000881,1.000623,1.000623,1.000577,0.000000,1.000577,1.000745
206207,1.000763,1.000763,1.000408,1.000912,1.001224,1.000499,1.000816,1.000763,1.000645,1.000499,...,1.000706,1.000816,1.000499,1.001079,1.000763,1.000763,1.000706,1.000577,0.000000,0.871691


In [25]:
recommender = cosine_similarity(high_volume_users)

In [26]:
recommender_df = pd.DataFrame(recommender, columns=high_volume_users.index, index=high_volume_users.index)
recommender_df

user_id_x,7,46,68,137,140,150,155,226,396,417,...,206025,206038,206103,206104,206105,206165,206193,206206,206207,206208
user_id_x,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7,1.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000
46,0.0,1.0,0.0,0.0,0.000000,0.0,0.0,0.142857,0.000000,0.0,...,0.000000,0.133631,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000
68,0.0,0.0,1.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000
137,0.0,0.0,0.0,1.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000
140,0.0,0.0,0.0,0.0,1.000000,0.0,0.0,0.000000,0.105409,0.0,...,0.096225,0.000000,0.000000,0.062994,0.0,0.0,0.0,0.117851,0.000000,0.074536
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206165,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,...,0.000000,0.133631,0.000000,0.000000,0.0,1.0,0.0,0.000000,0.000000,0.000000
206193,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0,1.0,0.000000,0.000000,0.000000
206206,0.0,0.0,0.0,0.0,0.117851,0.0,0.0,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,1.000000,0.000000,0.000000
206207,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,1.000000,0.129099


In [27]:
manhattan_dists = pd.DataFrame(manhattan_distances(high_volume_users),index=high_volume_users.index, columns=high_volume_users.index)
cosine_dists.head()

NameError: name 'cosine_dists' is not defined

In [None]:
cosine_dists = pd.DataFrame(cosine_similarity(high_volume_users),index=high_volume_users.index, columns=high_volume_users.index)
cosine_dists.head()

In [None]:
# function to enter a user_id and return a list of 10 recommended items
def product_recommender(user_id):
    user = high_volume.groupby(['user_id_x','product_name']).size().sort_values(ascending=False).unstack().fillna(0)
    user_similiartiy = pd.DataFrame(cosine_similarity(user), index=user.index, columns=user.index)

    products = high_volume.groupby(['product_name','user_id_x']).size().sort_values(ascending=False).unstack().fillna(0)
    
    recommended = pd.Series(np.dot(products.values,cosine_dists[user_id]), index=products.index)
    return recommended.sort_values(ascending=False)[0:11]

In [80]:
# function to enter a user_id and return a list of 10 recommended items
def product_recommender_tier2(user_id):
    user = high_volume.groupby(['user_id_x','product_name']).size().sort_values(ascending=False).unstack().fillna(0)
    user_similiartiy = pd.DataFrame(cosine_similarity(user), index=user.index, columns=user.index)

    products = high_volume.groupby(['product_name','user_id_x']).size().sort_values(ascending=False).unstack().fillna(0)
    
    recommended = pd.Series(np.dot(products.values,cosine_dists[user_id]), index=products.index)
    return recommended.sort_values(ascending=False)[22:31]

In [81]:
product_recommender_tier2(155)

product_name
Unsweetened Vanilla Almond Milk      0.823983
Organic Large Grade AA Brown Eggs    0.811911
Limes                                0.796422
Organic Whole Strawberries           0.774106
Sparkling Water Grapefruit           0.772956
Organic Unsweetened Almond Milk      0.765610
Organic Navel Orange                 0.749427
Sparkling Natural Mineral Water      0.741536
Yellow Onions                        0.735702
dtype: float64

In [None]:
user_orders.columns

In [63]:
product_recommender(155)

product_name
Organic Egg Whites                   30.202771
100% Raw Coconut Water                6.140192
Organic Baby Spinach                  3.951781
Banana                                3.699367
Bag of Organic Bananas                3.558407
Organic Hass Avocado                  2.550437
Organic Strawberries                  2.546313
Organic Raspberries                   1.855532
Organic Grape Tomatoes                1.812495
Organic Zucchini                      1.711898
Organic SprouTofu Super Firm Tofu     1.584164
dtype: float64

In [64]:
product_recommender(206206)

product_name
Large Alfresco Eggs       66.693176
Bag of Organic Bananas     8.485447
Organic Baby Spinach       5.484864
Organic Strawberries       5.418419
Banana                     4.719492
Organic Hass Avocado       4.704853
Organic Avocado            3.483861
Organic Yellow Onion       3.144441
Organic Whole Milk         3.097685
Organic Whole Cashews      2.983461
Organic Zucchini           2.392403
dtype: float64

In [66]:
users = high_volume.user_id_x.unique().tolist()

# calculate recall for 2500 users
def recommender_quality():
    recs = []
    for user in sorted(users)[:1000]:
        recommendations = product_recommender(user)
        top_20_products = [user_id == user].product_name.value_counts().head(20)
    
        recommendations_list = recommendations.index.tolist()
        top_20_items_list = top_20_products.index.tolist()
    
        res.append((len(set(recommendations_list) & set(top_20_items_list)))/5)
    return np.mean(res)
# get metric for the :1000 users
recommender_quality

<function __main__.recommender_quality()>

In [67]:
recommender_corr_df[206206].sort_values()[1:11]
#recommender_2_df['Matrix, The (1999)'].sort_values()[1:11] # top 10 movie recommendations for the matrix

user_id_x
140437    0.292947
144972    0.292947
66804     0.292947
40375     0.292947
201026    0.292947
47087     0.292947
89799     0.292947
151750    0.292947
18640     0.292947
59879     0.422796
Name: 206206, dtype: float64

In [68]:
def product_recommender_corr(user_id):
    user = high_volume.groupby(['user_id_x','product_name']).size().sort_values(ascending=False).unstack().fillna(0)
    user_similiartiy_corr = pd.DataFrame(pairwise_distances(user, metric='correlation'), index=user.index, columns=user.index)

    products = high_volume.groupby(['product_name','user_id_x']).size().sort_values(ascending=False).unstack().fillna(0)
    
    recommended = pd.Series(np.dot(products.values,cosine_dists[user_id]), index=products.index)
    return recommended.sort_values(ascending=False)[0:11]

In [69]:
product_recommender_corr(206206)

product_name
Large Alfresco Eggs       66.693176
Bag of Organic Bananas     8.485447
Organic Baby Spinach       5.484864
Organic Strawberries       5.418419
Banana                     4.719492
Organic Hass Avocado       4.704853
Organic Avocado            3.483861
Organic Yellow Onion       3.144441
Organic Whole Milk         3.097685
Organic Whole Cashews      2.983461
Organic Zucchini           2.392403
dtype: float64

In [70]:
def product_recommender_euclidean(user_id):
    user = high_volume.groupby(['user_id_x','product_name']).size().sort_values(ascending=False).unstack().fillna(0)
    user_similiartiy_corr = pd.DataFrame(pairwise_distances(user, metric='euclidean'), index=user.index, columns=user.index)

    products = high_volume.groupby(['product_name','user_id_x']).size().sort_values(ascending=False).unstack().fillna(0)
    
    recommended = pd.Series(np.dot(products.values,cosine_dists[user_id]), index=products.index)
    return recommended.sort_values(ascending=False)[0:11]

In [71]:
product_recommender_euclidean(206206)

product_name
Large Alfresco Eggs       66.693176
Bag of Organic Bananas     8.485447
Organic Baby Spinach       5.484864
Organic Strawberries       5.418419
Banana                     4.719492
Organic Hass Avocado       4.704853
Organic Avocado            3.483861
Organic Yellow Onion       3.144441
Organic Whole Milk         3.097685
Organic Whole Cashews      2.983461
Organic Zucchini           2.392403
dtype: float64

In [76]:
def product_recommender_manhattan(user_id):
    user = high_volume.groupby(['user_id_x','product_name']).size().sort_values(ascending=False).unstack().fillna(0)
    user_similiartiy_corr = pd.DataFrame(pairwise_distances(user, metric='manhattan'), index=user.index, columns=user.index)

    products = high_volume.groupby(['product_name','user_id_x']).size().sort_values(ascending=False).unstack().fillna(0)
    
    recommended = pd.Series(np.dot(products.values,manhattan_dists[user_id]), index=products.index)
    return recommended.sort_values(ascending=False)[0:11]

In [77]:
product_recommender_manhattan(206206)

product_name
Banana                                         5227.0
Bag of Organic Bananas                         4629.0
Organic Hass Avocado                           3044.0
Organic Baby Spinach                           2921.0
Organic Strawberries                           2803.0
Organic Raspberries                            2277.0
Organic Grade A Free Range Large Brown Eggs    1966.0
Organic Avocado                                1808.0
Organic Large Brown Grade AA Cage Free Eggs    1778.0
Organic Whole Milk                             1714.0
Limes                                          1464.0
dtype: float64