# Full credit to Jason Le / Alex Ellman 

https://towardsdatascience.com/instacart-market-basket-analysis-part-3-which-sets-of-products-should-be-recommended-to-shoppers-9651751d3cd3
from his blog at https://towardsdatascience.com/creating-a-grocery-product-recommender-for-instacart-c1b6bdf5ae13.  He created the functions for creating the Association Rules and they were so clean and well organized I couldn't imagine rewriting them myself.

I have used his functions with very little changes except to some of the variable names, print statements, and comments.  We do have differing output because I had clustered my users slightly differently and created 20 clusters instead of his 6.

## Dataset information

This data was retrieved from Kaggle and was provided by Instacart for a market basket analysis competition in 2018.  

The data is divided into 6 files:

- **_Aisles.csv_**: 134 Unique aisle numbers and descriptions
- **_Departments.csv_**: 21 Unique department numbers and descriptions
- **_Products.csv_**: 49,688 Unique product ids, with description, aisle id, and department id
- **_Orders.csv_**: 3,421,083 Unique order id, with user id, order number, order_dow, order_hour_of_day, days_since_prior_order, and eval_set indicating if the order is in train, prior, or test
- **_Order_products_train.csv_**: Order id, product id, add to cart order, and reorder indicator
- **_Order_products_prior.csv_**: Order id, product id, add to cart order, and reorder indicator


## Exploratory Data Analysis

### Import and basic info

**Output** - aisles, orders, products, departments, order_products_prior, order_products_train

In [1]:
import pandas as pd
import numpy as np
import itertools
import pickle
from collections import Counter
from itertools import combinations, groupby, chain
import warnings
warnings.filterwarnings('ignore')

## Prepare Data for Market Basket Analysis

In [76]:
# order_products_prior = pd.read_csv("Data/order_products__prior.csv")
# order_products_train = pd.read_csv("Data/order_products__train.csv")
# orders = pd.read_csv("Data/orders.csv")
aisles = pd.read_csv("Data/aisles.csv")
# grouped_users = pickle.load(open("Pickle/grouped_users.p", "rb"))
products_desc = pickle.load(open("Pickle/products_desc.p", "rb"))
merged_orders = pickle.load(open("Pickle/merged_orders.p", "rb"))
# orders_test = orders[orders['eval_set']=='test']
clustered_users = pickle.load(open("Pickle/clustered_users.p", "rb"))

In [3]:
# I want to calculate the association rules by each cluster.  It will make the data smaller.
merged_orders = merged_orders.sort_values(['order_id','product_id'])
merged_orders.head()

Unnamed: 0,order_id,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,aisle,department
18486555,1,112108,train,4,4,10,9.0,10246,3,0,Organic Celery Hearts,83,4,fresh vegetables,produce
18486554,1,112108,train,4,4,10,9.0,11109,2,1,Organic 4% Milk Fat Whole Milk Cottage Cheese,108,16,other creams cheeses,dairy eggs
18486558,1,112108,train,4,4,10,9.0,13176,6,0,Bag of Organic Bananas,24,4,fresh fruits,produce
18486560,1,112108,train,4,4,10,9.0,22035,8,1,Organic Whole String Cheese,21,16,packaged cheese,dairy eggs
18486557,1,112108,train,4,4,10,9.0,43633,5,1,Lightly Smoked Sardines in Olive Oil,95,15,canned meat seafood,canned goods


In [4]:
clustered_users.reset_index(inplace=True)

In [5]:
# Merging these to add the cluster to each order
clustered_orders = merged_orders.merge(clustered_users[['user_id','cluster']], on = 'user_id')

## Market Basket Analysis

In [6]:
# Returns frequency counts for items and item pairs
def freq(iterable):
    if type(iterable) == pd.core.series.Series:
        return iterable.value_counts().rename("freq")
    else: 
        return pd.Series(Counter(iterable)).rename("freq")

    
# Returns number of unique orders
def order_count(order_item):
    return len(set(order_item.index))


# Returns generator that yields item pairs, one at a time
'''Python Generators - A generator is a special type of function that returns an iterable sequence of items.
However, unlike regular functions that return all the values at once, a generator yields one value at a time. To get the
next value in the set, we must ask for it — either by explicitly calling the generator’s built-in “next” method, or
implicitly via a for loop.  This is a great property of generators because it means that we don’t have to store all of
the values in memory at once. We can load and process one value at a time, discard that value when we finished, and move
on to process the next value. This feature makes generators perfect for creating item pairs and counting their
frequency of co-occurrence.'''
def get_item_pairs(order_item):
    order_item = order_item.reset_index().to_numpy()
    for order_id, order_object in groupby(order_item, lambda x: x[0]):
        item_list = [item[1] for item in order_object]
              
        for item_pair in combinations(item_list, 2):
            yield item_pair
            

# Returns frequency and support associated with item
def merge_item_stats(item_pairs, item_stats):
    return (item_pairs
                .merge(item_stats.rename(columns={'freq': 'freqA', 'support': 'supportA'}), 
                       left_on='item_A', right_index=True)
                .merge(item_stats.rename(columns={'freq': 'freqB', 'support': 'supportB'}), 
                       left_on='item_B', right_index=True))


# Returns name associated with item
def merge_item_name(rules, item_name):
    columns = ['itemA','itemB','freqAB','supportAB','freqA','supportA','freqB','supportB', 
               'confidenceAtoB','confidenceBtoA','lift']
    rules = (rules
                .merge(item_name.rename(columns={'item_name': 'itemA'}), left_on='item_A', right_on='item_id')
                .merge(item_name.rename(columns={'item_name': 'itemB'}), left_on='item_B', right_on='item_id'))

In [7]:
def association_rules(order_item, min_support):

#     print("Starting number of order_item pairs: {:22d}".format(len(order_item)))
    print("Starting number of orders, items, order_item pairs: {}, {}, {}".format(order_item.index.nunique(),
                                                                          order_item.nunique(),len(order_item))) 

    # Calculate item frequency and support
    item_stats             = freq(order_item).to_frame("freq")
    item_stats['support']  = item_stats['freq'] / order_count(order_item) 


    # Filter from order_item items below min support 
    qualifying_items       = item_stats[item_stats['support'] >= min_support].index
    order_item             = order_item[order_item.isin(qualifying_items)]

    print("Items with support >= {}: {:15d}".format(min_support, len(qualifying_items)))
#     print("Remaining order_item: {:21d}".format(len(order_item)))
    print("Remaning number of orders, items, order_item pairs: {}, {}, {}".format(order_item.index.nunique(),
                                                                          order_item.nunique(),len(order_item)))


    # Filter from order_item orders with less than 2 items
    order_size             = freq(order_item.index)
    qualifying_orders      = order_size[order_size >= 2].index
    order_item             = order_item[order_item.index.isin(qualifying_orders)]

    print("Remaining orders with 2+ items: {:11d}".format(len(qualifying_orders)))
#     print("Remaining order_item: {:21d}".format(len(order_item)))
    print("Remaining number of orders, items, order_item pairs: {}, {}, {}".format(order_item.index.nunique(),
                                                                          order_item.nunique(),len(order_item)))


    # Recalculate item frequency and support
    item_stats             = freq(order_item).to_frame("freq")
    item_stats['support']  = item_stats['freq'] / order_count(order_item) 


    # Get item pairs generator
    item_pair_gen          = get_item_pairs(order_item)


    # Calculate item pair frequency and support
    item_pairs              = freq(item_pair_gen).to_frame("freqAB")
    item_pairs['supportAB'] = item_pairs['freqAB'] / len(qualifying_orders)

    print("Item pairs: {:31d}".format(len(item_pairs)))


    # Filter from item_pairs those below min support
    item_pairs              = item_pairs[item_pairs['supportAB'] >= min_support]

    print("Item pairs with support >= {}: {:10d}\n".format(min_support, len(item_pairs)))


    # Create table of association rules and compute relevant metrics
    item_pairs = item_pairs.reset_index().rename(columns={'level_0': 'item_A', 'level_1': 'item_B'})
    item_pairs = merge_item_stats(item_pairs, item_stats)
    
    item_pairs['confidenceAtoB'] = item_pairs['supportAB'] / item_pairs['supportA']
    item_pairs['confidenceBtoA'] = item_pairs['supportAB'] / item_pairs['supportB']
    item_pairs['lift']           = item_pairs['supportAB'] / (item_pairs['supportA'] * item_pairs['supportB'])
    
    
    # Return association rules sorted by lift in descending order
    return item_pairs.sort_values('lift', ascending=False)

In [8]:
# separate order data by cluster and transform into expected format of group association function

cluster_orders = {}
for x in range(0,20):
    i = clustered_orders[clustered_orders['cluster'] == x]
    i = i[['order_id', 'product_id']]
    i = i.set_index('order_id')['product_id'].rename('item_id')
    cluster_orders[x] = i

In [9]:
# run each cluster series through association rule function and obtain association rules at item level

group_association_rules_dic = {}
for x in range(0,20):
    print('Cluster: ', x)
    group_association_rules_dic[x] = association_rules(cluster_orders[x],.0001)

Cluster:  0
Starting number of orders, items, order_item pairs: 22099, 14626, 264115
Items with support >= 0.0001:            8717
Remaning number of orders, items, order_item pairs: 22078, 8717, 256351
Remaining orders with 2+ items:       21637
Remaining number of orders, items, order_item pairs: 21637, 8717, 255910
Item pairs:                          808257
Item pairs with support >= 0.0001:     178869

Cluster:  1
Starting number of orders, items, order_item pairs: 785071, 46426, 7788280
Items with support >= 0.0001:           10650
Remaning number of orders, items, order_item pairs: 781265, 10650, 7187429
Remaining orders with 2+ items:      746198
Remaining number of orders, items, order_item pairs: 746198, 10650, 7152362
Item pairs:                        10064495
Item pairs with support >= 0.0001:      54507

Cluster:  2
Starting number of orders, items, order_item pairs: 53338, 16897, 754548
Items with support >= 0.0001:            8319
Remaning number of orders, items, order

In [10]:
# format item association rule dataframes and merge product names

for x in group_association_rules_dic:
    group_association_rules_dic[x] = group_association_rules_dic[x].merge(products_desc[['product_id','product_name']]
               .rename(columns = {'product_id':'item_A','product_name':'product_name_A'}), on = 'item_A')
    group_association_rules_dic[x] = group_association_rules_dic[x].merge(products_desc[['product_id','product_name']]
               .rename(columns = {'product_id':'item_B','product_name':'product_name_B'}), on = 'item_B')
    group_association_rules_dic[x] = group_association_rules_dic[x][['item_A','item_B','product_name_A',
                                                                    'product_name_B','freqAB','supportAB','freqA',
                                                                    'supportA','freqB','supportB','confidenceAtoB',
                                                                    'confidenceBtoA','lift']]

In [84]:
products_desc[['product_id','product_name']].rename(columns = {'product_id':'item_A','product_name':'product_name_A'})

Unnamed: 0,item_A,product_name_A
0,1,Chocolate Sandwich Cookies
1,2,All-Seasons Salt
2,3,Robust Golden Unsweetened Oolong Tea
3,4,Smart Ones Classic Favorites Mini Rigatoni Wit...
4,5,Green Chile Anytime Sauce
...,...,...
49683,49684,"Vodka, Triple Distilled, Twist of Vanilla"
49684,49685,En Croute Roast Hazelnut Cranberry
49685,49686,Artisan Baguette
49686,49687,Smartblend Healthy Metabolism Dry Cat Food


In [85]:
group_association_rules_dic[0]

Unnamed: 0,item_A,item_B,product_name_A,product_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,21206,39365,English Breakfast Black Tea,French Vanilla Syrup,3,0.000139,3,0.000139,3,0.000139,1.000000,1.000000,7212.333333
1,35749,39365,Ginger Tea,French Vanilla Syrup,3,0.000139,5,0.000231,3,0.000139,0.600000,1.000000,4327.400000
2,35102,39365,The Original Liquid Coffee Creamer,French Vanilla Syrup,3,0.000139,6,0.000277,3,0.000139,0.500000,1.000000,3606.166667
3,10312,39365,Zen Tea,French Vanilla Syrup,3,0.000139,14,0.000647,3,0.000139,0.214286,1.000000,1545.500000
4,21206,35749,English Breakfast Black Tea,Ginger Tea,3,0.000139,3,0.000139,5,0.000231,1.000000,0.600000,4327.400000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
178864,13176,17619,Bag of Organic Bananas,Premium Fish Sauce,5,0.000231,3987,0.184268,16,0.000739,0.001254,0.312500,1.695902
178865,13176,45664,Bag of Organic Bananas,Onion Powder,3,0.000139,3987,0.184268,10,0.000462,0.000752,0.300000,1.628066
178866,13176,17592,Bag of Organic Bananas,Chinese Eggplant,3,0.000139,3987,0.184268,11,0.000508,0.000752,0.272727,1.480060
178867,13176,18272,Bag of Organic Bananas,Slices Cheddar Cheese,3,0.000139,3987,0.184268,12,0.000555,0.000752,0.250000,1.356722


In [11]:
pickle.dump(group_association_rules_dic, open("Pickle/group_association_rules_dic.p", "wb"))

In [60]:
products_desc

Unnamed: 0,product_id,product_name,aisle_id,department_id,aisle,department,metadata,stemmed,new_stemmed
0,1,Chocolate Sandwich Cookies,61,19,cookies cakes,snacks,cookies cakes snacks Chocolate Sandwich Cookies,cake chocol cooki snack sandwich,cake chocol cooki snack sandwich
1,2,All-Seasons Salt,104,13,spices seasonings,pantry,spices seasonings pantry All-Seasons Salt,spice all-season season salt pantri,spice salt season pantri
2,3,Robust Golden Unsweetened Oolong Tea,94,7,tea,beverages,tea beverages Robust Golden Unsweetened Oolong...,robust oolong tea beverag golden unsweeten,robust oolong tea beverag golden unsweeten
3,4,Smart Ones Classic Favorites Mini Rigatoni Wit...,38,1,frozen meals,frozen,frozen meals frozen Smart Ones Classic Favorit...,rigatoni cream meal mini vodka sauc frozen fav...,rigatoni cream meal mini vodka sauc frozen fav...
4,5,Green Chile Anytime Sauce,5,13,marinades meat preparation,pantry,marinades meat preparation pantry Green Chile ...,meat marinad chile prepar sauc green anytim pa...,meat marinad chile prepar sauc green anytim pa...
...,...,...,...,...,...,...,...,...,...
49683,49684,"Vodka, Triple Distilled, Twist of Vanilla",124,5,spirits,alcohol,"spirits alcohol Vodka, Triple Distilled, Twist...",distil vanilla of vodka spirit tripl alcohol t...,distil vanilla of vodka spirit tripl alcohol t...
49684,49685,En Croute Roast Hazelnut Cranberry,42,1,frozen vegan vegetarian,frozen,frozen vegan vegetarian frozen En Croute Roast...,en cranberri crout hazelnut frozen vegetarian ...,en cranberri hazelnut frozen vegetarian vegan ...
49685,49686,Artisan Baguette,112,3,bread,bakery,bread bakery Artisan Baguette,baguett bread bakeri artisan,baguett bread bakeri artisan
49686,49687,Smartblend Healthy Metabolism Dry Cat Food,41,8,cat food care,pets,cat food care pets Smartblend Healthy Metaboli...,metabol dri smartblend care healthi cat food pet,metabol dri smartblend care healthi cat food pet


In [59]:
group_association_rules_dic[0]

Unnamed: 0,item_A,item_B,product_name_A,product_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,21206,39365,English Breakfast Black Tea,French Vanilla Syrup,3,0.000139,3,0.000139,3,0.000139,1.000000,1.000000,7212.333333
1,35749,39365,Ginger Tea,French Vanilla Syrup,3,0.000139,5,0.000231,3,0.000139,0.600000,1.000000,4327.400000
2,35102,39365,The Original Liquid Coffee Creamer,French Vanilla Syrup,3,0.000139,6,0.000277,3,0.000139,0.500000,1.000000,3606.166667
3,10312,39365,Zen Tea,French Vanilla Syrup,3,0.000139,14,0.000647,3,0.000139,0.214286,1.000000,1545.500000
4,21206,35749,English Breakfast Black Tea,Ginger Tea,3,0.000139,3,0.000139,5,0.000231,1.000000,0.600000,4327.400000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
178864,13176,17619,Bag of Organic Bananas,Premium Fish Sauce,5,0.000231,3987,0.184268,16,0.000739,0.001254,0.312500,1.695902
178865,13176,45664,Bag of Organic Bananas,Onion Powder,3,0.000139,3987,0.184268,10,0.000462,0.000752,0.300000,1.628066
178866,13176,17592,Bag of Organic Bananas,Chinese Eggplant,3,0.000139,3987,0.184268,11,0.000508,0.000752,0.272727,1.480060
178867,13176,18272,Bag of Organic Bananas,Slices Cheddar Cheese,3,0.000139,3987,0.184268,12,0.000555,0.000752,0.250000,1.356722


In [12]:
# group_association_rules_dic = pickle.load(open("Pickle/group_association_rules_dic.p", "rb"))

In [13]:
# example of association rule dataframe

x = group_association_rules_dic[19]
x.sort_values('lift', ascending = False)

Unnamed: 0,item_A,item_B,product_name_A,product_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,12191,29169,Kettle Cooked Original Potato Chips,Sea Salt & Cracked Pepper Potato Chips,3,0.000122,3,0.000122,3,0.000122,1.000000,1.000000,8189.333333
1,2202,47716,98% Fat Free Condensed Soup Cream of Chicken,98% Fat Free Condensed Soup Cream Of Celery,3,0.000122,3,0.000122,3,0.000122,1.000000,1.000000,8189.333333
850,2753,21985,Blueberry Drinkable Whole Milk Yogurt,Vanilla Whole Milk Drinkable Yogurt,3,0.000122,3,0.000122,3,0.000122,1.000000,1.000000,8189.333333
823,6907,42569,Chips Ahoy! White Fudge Chunky Chocolate Chunk...,Cinnamon Bun Sandwich Cookies,3,0.000122,3,0.000122,3,0.000122,1.000000,1.000000,8189.333333
10,5909,42436,Lemon Verbena Hand Wash,"Hand Wash, Lavender Fields",3,0.000122,3,0.000122,3,0.000122,1.000000,1.000000,8189.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...
40530,13176,43965,Bag of Organic Bananas,Glazed Buttermilk Doughnuts,3,0.000122,3716,0.151254,117,0.004762,0.000807,0.025641,0.169523
29662,5450,47209,Small Hass Avocado,Organic Hass Avocado,5,0.000204,387,0.015752,1994,0.081162,0.012920,0.002508,0.159186
9782,6729,21137,Cookie Tray,Organic Strawberries,4,0.000163,302,0.012292,2496,0.101596,0.013245,0.001603,0.130370
9847,16797,21137,Strawberries,Organic Strawberries,11,0.000448,1069,0.043512,2496,0.101596,0.010290,0.004407,0.101284


In [14]:
# separate order data by cluster and transform into expected format of group association function at aisle level

cluster_aisle_orders = {}
for x in range(0,20):
    i = clustered_orders[clustered_orders['cluster'] == x]
    i = i[['order_id', 'product_id', 'aisle_id']]
    i = i.groupby(['order_id','aisle_id'])['product_id'].count().reset_index().set_index('order_id')['aisle_id'].rename('item_id')
    cluster_aisle_orders[x] = i

In [15]:
cluster_aisle_orders[1]

order_id
2           17
2           19
2           83
2           86
2           88
          ... 
3421083     52
3421083     74
3421083     78
3421083     92
3421083    117
Name: item_id, Length: 5650431, dtype: int64

In [16]:
# run association function at aisle level

group_aisle_association_rules_dic = {}
for x in range(0,20):
    print("Cluster: ", x)
    group_aisle_association_rules_dic[x] = association_rules(cluster_aisle_orders[x],.0001)

Cluster:  0
Starting number of orders, items, order_item pairs: 22099, 134, 190556
Items with support >= 0.0001:             134
Remaning number of orders, items, order_item pairs: 22099, 134, 190556
Remaining orders with 2+ items:       21561
Remaining number of orders, items, order_item pairs: 21561, 134, 190018
Item pairs:                            8092
Item pairs with support >= 0.0001:       7159

Cluster:  1
Starting number of orders, items, order_item pairs: 785071, 134, 5650431
Items with support >= 0.0001:             134
Remaning number of orders, items, order_item pairs: 785071, 134, 5650431
Remaining orders with 2+ items:      744957
Remaining number of orders, items, order_item pairs: 744957, 134, 5610317
Item pairs:                            8906
Item pairs with support >= 0.0001:       7324

Cluster:  2
Starting number of orders, items, order_item pairs: 53338, 134, 410177
Items with support >= 0.0001:             133
Remaning number of orders, items, order_item pairs:

In [17]:
pickle.dump(group_aisle_association_rules_dic, open("Pickle/group_aisle_association_rules_dic.p", "wb"))

In [80]:
group_aisle_association_rules_dic = pickle.load(open("Pickle/group_aisle_association_rules_dic.p", "rb"))

In [81]:
# reformat cluster aisle assocation dataframes

for x in group_aisle_association_rules_dic:
    group_aisle_association_rules_dic[x] = group_aisle_association_rules_dic[x].merge(aisles
               .rename(columns = {'aisle_id':'item_A','aisle':'aisle_name_A'}), on = 'item_A')
    group_aisle_association_rules_dic[x] = group_aisle_association_rules_dic[x].merge(aisles
               .rename(columns = {'aisle_id':'item_B','aisle':'aisle_name_B'}), on = 'item_B')
    group_aisle_association_rules_dic[x] = group_aisle_association_rules_dic[x][['item_A','item_B','aisle_name_A',
                                                                    'aisle_name_B','freqAB','supportAB','freqA',
                                                                    'supportA','freqB','supportB','confidenceAtoB',
                                                                    'confidenceBtoA','lift']]
    group_aisle_association_rules_dic[x] = group_aisle_association_rules_dic[x].rename(columns = {'item_A':'aisle_A','item_B':'aisle_B'})

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [82]:
x = group_aisle_association_rules_dic[0].sort_values('lift',ascending = False)
x.sort_values('lift',ascending = False)

Unnamed: 0,aisle_A,aisle_B,aisle_name_A,aisle_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,28,62,red wines,white wines,34,0.001577,92,0.004267,51,0.002365,0.369565,0.666667,156.239130
1,27,62,beers coolers,white wines,14,0.000649,117,0.005426,51,0.002365,0.119658,0.274510,50.587230
4551,82,118,baby accessories,first aid,3,0.000139,34,0.001577,45,0.002087,0.088235,0.066667,42.276471
3131,27,28,beers coolers,red wines,21,0.000974,117,0.005426,92,0.004267,0.179487,0.228261,42.064381
4552,80,118,deodorants,first aid,3,0.000139,50,0.002319,45,0.002087,0.060000,0.066667,28.748000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4622,41,92,cat food care,baby food formula,3,0.000139,161,0.007467,1551,0.071935,0.018634,0.001934,0.259031
3044,28,91,red wines,soy lactosefree,4,0.000186,92,0.004267,3856,0.178841,0.043478,0.001037,0.243111
3045,27,91,beers coolers,soy lactosefree,5,0.000232,117,0.005426,3856,0.178841,0.042735,0.001297,0.238955
6958,52,68,frozen breakfast,bulk grains rice dried goods,3,0.000139,1491,0.069153,221,0.010250,0.002012,0.013575,0.196300


## Recommender

In [None]:
group_association_rules_dic[0]

In [None]:
products_desc.set_index('product_id', inplace=True)

In [None]:
# returns top x items associated based on lift

def product_to_product(cluster, product_id, item_lift, product_name, num_products):
    df = group_association_rules_dic[cluster]
    df = df[(df['item_A'] == product_id) | (df['item_B'] == product_id)][['product_name_A','item_A','product_name_B','item_B','confidenceAtoB','lift']].sort_values('lift', ascending = False)
    df = df[df['lift'] > item_lift]
    df = df.sort_values('lift', ascending = False)
    df = df.head(n = num_products)
    product_to_product_associations = df['product_name_A'].values.tolist()
    for x in df['product_name_B'].values.tolist():
        product_to_product_associations.append(x)
    product_to_product_associations = [x for x in product_to_product_associations if x != product_name]
    return product_to_product_associations

In [None]:
# returns recommended products given inputs

def pdp_recommender(user_id, product_id, item_lift, num_products):
    product_name = products_desc.at[product_id,'product_name']
    aisle_id = products_desc.at[product_id,'aisle_id']
    aisle_name = products_desc.at[product_id,'aisle']
    cluster = clustered_users.at[user_id, 'cluster']
    return product_to_product(cluster = cluster, product_id = product_id, item_lift = item_lift, 
                              product_name = product_name, num_products = num_products)

In [None]:
products_desc.at[product_id,'product_name']

In [None]:
# results for users in the 20 different clusters on prodcut 39055 Mild Roja Salsa, lift >1, 5 products

for i in range(0,20):
    user = clustered_users[clustered_users['cluster']==i].sample().index[0]
    print('\ncluster', i, ': user ', user)
    print('\n'.join([x for x in (pdp_recommender(user, 13176, 1, 5))]))