In [33]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [2]:
cluster0 = pd.read_csv('cluster0.csv').drop('Unnamed: 0',axis=1)
cluster1 = pd.read_csv('cluster1.csv').drop('Unnamed: 0',axis=1)
cluster2 = pd.read_csv('cluster2.csv').drop('Unnamed: 0',axis=1)
cluster3 = pd.read_csv('cluster3.csv').drop('Unnamed: 0',axis=1)
cluster4 = pd.read_csv('cluster4.csv').drop('Unnamed: 0',axis=1)
cluster5 = pd.read_csv('cluster5.csv').drop('Unnamed: 0',axis=1)

In [3]:
products = pd.read_csv('products.csv')

In [4]:
cluster0.shape

(7354493, 13)

### Cluster 0 - Basket Analysis

In [4]:
prod_freq = cluster0.groupby('product_id')['order_id'].count().reset_index()
prod_freq = prod_freq.rename(columns = {'order_id':'frequency'})
prod_freq = prod_freq.sort_values('frequency', ascending=False).reset_index(drop=True)
prod_freq = prod_freq.merge(products, on = 'product_id', how = 'left')
prod_freq


Unnamed: 0,product_id,frequency,product_name,aisle_id,department_id
0,24852,99641,Banana,24,4
1,21903,86029,Organic Baby Spinach,123,4
2,13176,84010,Bag of Organic Bananas,24,4
3,47209,66210,Organic Hass Avocado,24,4
4,21137,61536,Organic Strawberries,24,4
...,...,...,...,...,...
38480,16422,1,Cherry Limeade Flavored Fruit Drink,31,7
38481,16433,1,Ham & Cheese Loaf,96,20
38482,16438,1,Unsulphured Robust Molasses,29,13
38483,16443,1,Special K Chocolate Peanut Butter Bar,3,19


In [5]:
# only keeping the 100 most purchased products
prod_freq = prod_freq[0:100]
prod_freq_prod = list(prod_freq['product_id'])
len(prod_freq_prod)

100

In [6]:
# filter main cluster dataset only on the 100 most purchases products
cluster0 = cluster0[cluster0['product_id'].isin(prod_freq_prod)]
print(cluster0.shape)
cluster0['product_id'].nunique()

(2518676, 13)


100

In [7]:
cluster0.order_id.nunique()

593120

In [8]:
basket_0 = cluster0.groupby(['order_id', 'product_name'])['reordered'].count().unstack().reset_index().fillna(0).set_index('order_id')
basket_0.head()

product_name,Apple Honeycrisp Organic,Asparagus,Bag of Organic Bananas,Banana,Boneless Skinless Chicken Breasts,Broccoli Crown,Brussels Sprouts,Bunched Cilantro,Carrots,Cucumber Kirby,...,Seedless Red Grapes,Shredded Parmesan,Small Hass Avocado,Sparkling Water Grapefruit,Spring Water,Strawberries,Unsalted Butter,Unsweetened Almondmilk,Yellow Bell Pepper,Yellow Onions
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
18,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
# in order to use the the above frequency counts in the apriori algorithm I need to one hot encode them
def encode_basket(x):
    if x <= 0:
        return 0
    else:
        return 1

basket_0 = basket_0.applymap(encode_basket)

In [10]:
basket_0.head()

product_name,Apple Honeycrisp Organic,Asparagus,Bag of Organic Bananas,Banana,Boneless Skinless Chicken Breasts,Broccoli Crown,Brussels Sprouts,Bunched Cilantro,Carrots,Cucumber Kirby,...,Seedless Red Grapes,Shredded Parmesan,Small Hass Avocado,Sparkling Water Grapefruit,Spring Water,Strawberries,Unsalted Butter,Unsweetened Almondmilk,Yellow Bell Pepper,Yellow Onions
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
10,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
18,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
19,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
23,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
basket_0.shape

(593120, 100)

In [12]:
itemset0 = apriori(basket_0, use_colnames=True, verbose=1, min_support=0.021)

Processing 105 combinations | Sampling itemset size 32


In [13]:
itemset0

Unnamed: 0,support,itemsets
0,0.035829,(Apple Honeycrisp Organic)
1,0.055512,(Asparagus)
2,0.141641,(Bag of Organic Bananas)
3,0.167995,(Banana)
4,0.034276,(Boneless Skinless Chicken Breasts)
...,...,...
89,0.026189,"(Organic Avocado, Banana)"
90,0.029808,"(Organic Baby Spinach, Banana)"
91,0.021326,"(Organic Baby Spinach, Organic Avocado)"
92,0.021684,"(Organic Baby Spinach, Organic Hass Avocado)"


In [14]:
rules0 = association_rules(itemset0, metric="lift", min_threshold=1)
rules0.sort_values('lift', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
19,(Organic Garlic),(Organic Yellow Onion),0.100706,0.095731,0.02213,0.219752,2.295513,0.01249,1.158951
18,(Organic Yellow Onion),(Organic Garlic),0.095731,0.100706,0.02213,0.231173,2.295513,0.01249,1.169696
2,(Organic Hass Avocado),(Bag of Organic Bananas),0.11163,0.141641,0.029326,0.26271,1.854759,0.013515,1.164208
3,(Bag of Organic Bananas),(Organic Hass Avocado),0.141641,0.11163,0.029326,0.207047,1.854759,0.013515,1.120331
7,(Banana),(Cucumber Kirby),0.167995,0.072247,0.021062,0.12537,1.735304,0.008924,1.060738
6,(Cucumber Kirby),(Banana),0.072247,0.167995,0.021062,0.291522,1.735304,0.008924,1.174356
11,(Banana),(Organic Avocado),0.167995,0.099821,0.026189,0.15589,1.561687,0.009419,1.066423
10,(Organic Avocado),(Banana),0.099821,0.167995,0.026189,0.262355,1.561687,0.009419,1.127921
5,(Bag of Organic Bananas),(Organic Strawberries),0.141641,0.10375,0.02254,0.159136,1.533844,0.007845,1.065868
4,(Organic Strawberries),(Bag of Organic Bananas),0.10375,0.141641,0.02254,0.217255,1.533844,0.007845,1.096601


In [15]:
rules0.to_csv('basket0.csv')

### Cluster 1 - Basket Analysis

In [16]:
prod_freq1 = cluster1.groupby('product_id')['order_id'].count().reset_index()
prod_freq1 = prod_freq1.rename(columns = {'order_id':'frequency'})
prod_freq1 = prod_freq1.sort_values('frequency', ascending=False).reset_index(drop=True)
prod_freq1 = prod_freq1.merge(products, on = 'product_id', how = 'left')
prod_freq1

Unnamed: 0,product_id,frequency,product_name,aisle_id,department_id
0,24852,54637,Banana,24,4
1,13176,50581,Bag of Organic Bananas,24,4
2,21137,32089,Organic Strawberries,24,4
3,47209,20625,Organic Hass Avocado,24,4
4,16797,19755,Strawberries,24,4
...,...,...,...,...,...
24809,31908,1,Beef Rib Roast,122,12
24810,31905,1,Single-Grain Rice Cereal,92,18
24811,31903,1,"Sweetener, Liquid Stevia, Chocolate",97,13
24812,31901,1,Mineral Sun Spray Lotion Broad Spectrum SPF 30...,25,11


In [17]:
# only keeping the 100 most purchased products
prod_freq1 = prod_freq1[0:100]
prod_freq_prod1 = list(prod_freq1['product_id'])

In [18]:
# filter main cluster dataset only on the 100 most purchases products
cluster1 = cluster1[cluster1['product_id'].isin(prod_freq_prod1)]
print(cluster1.shape)
cluster1['product_id'].nunique()

(645303, 13)


100

In [19]:
basket_1 = cluster1.groupby(['order_id', 'product_name'])['reordered'].count().unstack().reset_index().fillna(0).set_index('order_id')


In [20]:
# in order to use the the above frequency counts in the apriori algorithm I need to one hot encode them

basket_1 = basket_1.applymap(encode_basket)

In [21]:
itemset1 = apriori(basket_1, use_colnames=True, verbose=1, min_support=0.01)

Processing 2451 combinations | Sampling itemset size 3


In [22]:
rules1 = association_rules(itemset1, metric="lift", min_threshold=1)
rules1.sort_values(['confidence','lift'], ascending=[False,False])

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
60,(Organic Fuji Apple),(Banana),0.075415,0.279089,0.036926,0.489637,1.754411,0.015879,1.412545
35,"(Clementines, Bag)",(Banana),0.024279,0.279089,0.011422,0.470440,1.685625,0.004646,1.361339
36,(Cucumber Kirby),(Banana),0.032268,0.279089,0.014967,0.463828,1.661934,0.005961,1.344551
39,(Gala Apples),(Banana),0.023165,0.279089,0.010717,0.462624,1.657621,0.004252,1.341539
47,(Honeycrisp Apple),(Banana),0.059054,0.279089,0.027308,0.462417,1.656878,0.010826,1.341022
...,...,...,...,...,...,...,...,...,...
43,(Banana),(Grape White/Green Seedless),0.279089,0.034459,0.012668,0.045390,1.317232,0.003051,1.011451
117,(Bag of Organic Bananas),"(Organic Hass Avocado, Organic Strawberries)",0.258371,0.025418,0.010880,0.042111,1.656745,0.004313,1.017427
34,(Banana),"(Clementines, Bag)",0.279089,0.024279,0.011422,0.040925,1.685625,0.004646,1.017356
38,(Banana),(Gala Apples),0.279089,0.023165,0.010717,0.038399,1.657621,0.004252,1.015842


In [23]:
rules1.to_csv('basket1.csv',index=False)

### Cluster 2 - Basket Analysis

In [4]:
prod_freq2 = cluster2.groupby('product_id')['order_id'].count().reset_index()
prod_freq2 = prod_freq2.rename(columns = {'order_id':'frequency'})
prod_freq2 = prod_freq2.sort_values('frequency', ascending=False).reset_index(drop=True)
prod_freq2 = prod_freq2.merge(products, on = 'product_id', how = 'left')

In [5]:
# only keeping the 100 most purchased products
prod_freq2 = prod_freq2[0:100]
prod_freq_prod2 = list(prod_freq2['product_id'])

In [6]:
# filter main cluster dataset only on the 100 most purchases products
cluster2 = cluster2[cluster2['product_id'].isin(prod_freq_prod2)]
print(cluster2.shape)
cluster2['product_id'].nunique()

(966162, 13)


100

In [7]:
basket_2 = cluster2.groupby(['order_id', 'product_name'])['reordered'].count().unstack().reset_index().fillna(0).set_index('order_id')

In [8]:
def encode_basket(x):
    if x <= 0:
        return 0
    else:
        return 1

basket_2 = basket_2.applymap(encode_basket)


In [9]:
itemset2 = apriori(basket_2, use_colnames=True, verbose=1, min_support=0.0109)

Processing 8372 combinations | Sampling itemset size 2


In [10]:
itemset2

Unnamed: 0,support,itemsets
0,0.014306,(100% Raw Coconut Water)
1,0.035288,(100% Whole Wheat Bread)
2,0.040095,(2% Reduced Fat Milk)
3,0.071607,(Bag of Organic Bananas)
4,0.128736,(Banana)
...,...,...
87,0.015966,(Unsweetened Original Almond Breeze Almond Milk)
88,0.012194,(Unsweetened Vanilla Almond Milk)
89,0.014760,(Vanilla Almond Breeze Almond Milk)
90,0.028970,(Whole Milk)


In [11]:
rules2 = association_rules(itemset2, metric="lift", min_threshold=0.5)
rules2.sort_values(['confidence','lift'], ascending=[False,False])

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction


### Cluster 3 - Basket Analysis

In [48]:
prod_freq3 = cluster3.groupby('product_id')['order_id'].count().reset_index()
prod_freq3 = prod_freq3.rename(columns = {'order_id':'frequency'})
prod_freq3 = prod_freq3.sort_values('frequency', ascending=False).reset_index(drop=True)
prod_freq3 = prod_freq3.merge(products, on = 'product_id', how = 'left')

In [49]:
# only keeping the 100 most purchased products
prod_freq3 = prod_freq3[0:100]
prod_freq_prod3 = list(prod_freq3['product_id'])

In [50]:
# filter main cluster dataset only on the 100 most purchases products
cluster3 = cluster3[cluster3['product_id'].isin(prod_freq_prod3)]
print(cluster3.shape)
cluster3['product_id'].nunique()

(127749, 13)


100

In [51]:
basket_3 = cluster3.groupby(['order_id', 'product_name'])['reordered'].count().unstack().reset_index().fillna(0).set_index('order_id')

In [52]:
basket_3 = basket_3.applymap(encode_basket)

In [57]:
itemset3 = apriori(basket_3, use_colnames=True, verbose=1, min_support=0.002)

Processing 70 combinations | Sampling itemset size 76 5


In [58]:
itemset3

Unnamed: 0,support,itemsets
0,0.008699,(0% Greek Strained Yogurt)
1,0.007478,(1 Liter)
2,0.009357,(100% Natural Spring Water)
3,0.006970,(100% Raw Coconut Water)
4,0.007102,(2% Reduced Fat Milk)
...,...,...
1391,0.002029,"(Kiwi Sandia Sparkling Water, Curate Melon Pom..."
1392,0.002255,"(Kiwi Sandia Sparkling Water, Curate Melon Pom..."
1393,0.002048,"(Kiwi Sandia Sparkling Water, Curate Melon Pom..."
1394,0.002029,"(Kiwi Sandia Sparkling Water, Sparkling Water ..."


In [59]:
rules3 = association_rules(itemset3, metric="lift", min_threshold=0.5)
rules3.sort_values(['confidence','lift'], ascending=[False,False])

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
3659,"(All Natural Apricot Sparkling Water, Sparklin...",(Sparkling Water Grapefruit),0.002255,0.150568,0.002198,0.975000,6.475465,0.001859,33.977266
7593,"(Organic Fuji Apple, Sparkling Water Berry, Pu...",(Sparkling Water Grapefruit),0.002123,0.150568,0.002067,0.973451,6.465179,0.001747,31.995259
7852,"(Lime Sparkling Water, All Natural Apricot Spa...",(Sparkling Water Grapefruit),0.002649,0.150568,0.002574,0.971631,6.453091,0.002175,29.942466
7942,"(Peach Pear Flavored Sparkling Water, All Natu...",(Sparkling Water Grapefruit),0.002893,0.150568,0.002781,0.961039,6.382743,0.002345,21.802079
3560,"(Lime Sparkling Water, All Natural Apricot Spa...",(Sparkling Water Grapefruit),0.004133,0.150568,0.003946,0.954545,6.339616,0.003323,18.687496
...,...,...,...,...,...,...,...,...,...
3446,(Sparkling Water Grapefruit),"(Peach Pear Flavored Sparkling Water, Kiwi San...",0.150568,0.002255,0.002010,0.013352,5.922006,0.001671,1.011247
7269,(Sparkling Water Grapefruit),"(Lime Sparkling Water, Sparkling Lemon Water, ...",0.150568,0.002292,0.002010,0.013352,5.824924,0.001665,1.011209
13007,(Sparkling Water Grapefruit),"(Curate Melon Pomelo Sparking Water, Sparkling...",0.150568,0.002330,0.002010,0.013352,5.730974,0.001660,1.011171
8023,(Sparkling Water Grapefruit),"(Lime Sparkling Water, Peach Pear Flavored Spa...",0.150568,0.002349,0.002010,0.013352,5.685126,0.001657,1.011152


In [60]:
rules3.to_csv('basket3.csv',index=False)

### Cluster 4 - Basket Analysis

In [4]:
prod_freq4 = cluster4.groupby('product_id')['order_id'].count().reset_index()
prod_freq4 = prod_freq4.rename(columns = {'order_id':'frequency'})
prod_freq4 = prod_freq4.sort_values('frequency', ascending=False).reset_index(drop=True)
prod_freq4 = prod_freq4.merge(products, on = 'product_id', how = 'left')

In [5]:
# only keeping the 100 most purchased products
prod_freq4 = prod_freq4[0:100]
prod_freq_prod4 = list(prod_freq4['product_id'])

In [6]:
# filter main cluster dataset only on the 100 most purchases products
cluster4 = cluster4[cluster4['product_id'].isin(prod_freq_prod4)]
print(cluster4.shape)
cluster4['product_id'].nunique()

(3583719, 13)


100

In [7]:
basket_4 = cluster4.groupby(['order_id', 'product_name'])['reordered'].count().unstack().reset_index().fillna(0).set_index('order_id')

In [10]:
def encode_basket(x):
    if x <= 0:
        return 0
    else:
        return 1

basket_4 = basket_4.applymap(encode_basket)

In [15]:
itemset4 = apriori(basket_4, use_colnames=True, verbose=1, min_support=0.022)

Processing 63 combinations | Sampling itemset size 3 2


In [16]:
itemset4

Unnamed: 0,support,itemsets
0,0.030351,(100% Whole Wheat Bread)
1,0.047038,(Apple Honeycrisp Organic)
2,0.025713,(Asparagus)
3,0.182874,(Bag of Organic Bananas)
4,0.240262,(Banana)
...,...,...
61,0.027466,"(Banana, Organic Avocado)"
62,0.026954,"(Banana, Organic Baby Spinach)"
63,0.032585,"(Organic Strawberries, Banana)"
64,0.022392,"(Banana, Strawberries)"


In [17]:
rules4 = association_rules(itemset4, metric="lift", min_threshold=0.5)
rules4.sort_values(['confidence','lift'], ascending=[False,False])

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
15,(Strawberries),(Banana),0.061449,0.240262,0.022392,0.36439,1.516638,0.007628,1.19529
9,(Organic Avocado),(Banana),0.084122,0.240262,0.027466,0.326503,1.358945,0.007255,1.128049
5,(Organic Raspberries),(Bag of Organic Bananas),0.076729,0.182874,0.023962,0.312293,1.707697,0.00993,1.188189
3,(Organic Hass Avocado),(Bag of Organic Bananas),0.10936,0.182874,0.033962,0.310552,1.698179,0.013963,1.18519
6,(Organic Strawberries),(Bag of Organic Bananas),0.142823,0.182874,0.03447,0.241346,1.319742,0.008351,1.077074
11,(Organic Baby Spinach),(Banana),0.112425,0.240262,0.026954,0.239755,0.997891,-5.7e-05,0.999333
12,(Organic Strawberries),(Banana),0.142823,0.240262,0.032585,0.228147,0.949577,-0.00173,0.984304
1,(Organic Baby Spinach),(Bag of Organic Bananas),0.112425,0.182874,0.025378,0.225728,1.23434,0.004818,1.055348
17,(Organic Hass Avocado),(Organic Strawberries),0.10936,0.142823,0.02345,0.214431,1.501368,0.007831,1.091153
7,(Bag of Organic Bananas),(Organic Strawberries),0.182874,0.142823,0.03447,0.18849,1.319742,0.008351,1.056274


In [18]:
rules4.to_csv('basket4.csv',index=False)

### Cluster 5 - Basket Analysis

In [17]:
prod_freq5 = cluster5.groupby('product_id')['order_id'].count().reset_index()
prod_freq5 = prod_freq5.rename(columns = {'order_id':'frequency'})
prod_freq5 = prod_freq5.sort_values('frequency', ascending=False).reset_index(drop=True)
prod_freq5 = prod_freq5.merge(products, on = 'product_id', how = 'left')

NameError: name 'cluster5' is not defined

In [20]:
# only keeping the 100 most purchased products
prod_freq5 = prod_freq5[0:100]
prod_freq_prod5 = list(prod_freq5['product_id'])

In [21]:
# filter main cluster dataset only on the 100 most purchases products
cluster5 = cluster5[cluster5['product_id'].isin(prod_freq_prod5)]
print(cluster5.shape)
cluster5['product_id'].nunique()

(335134, 13)


100

In [22]:
basket_5 = cluster5.groupby(['order_id', 'product_name'])['reordered'].count().unstack().reset_index().fillna(0).set_index('order_id')

In [23]:
basket_5 = basket_5.applymap(encode_basket)

In [43]:
itemset5 = apriori(basket_5, use_colnames=True, verbose=1, min_support=0.004)

Processing 900 combinations | Sampling itemset size 4 3


In [44]:
itemset5

Unnamed: 0,support,itemsets
0,0.025897,(0% Greek Strained Yogurt)
1,0.011303,(Almonds)
2,0.036798,(Apples)
3,0.120790,(Baby Cucumbers)
4,0.023824,(Bag of Jumbo Yellow Onions)
...,...,...
527,0.004489,"(Seedless Cucumbers, Rainbow Bell Peppers, Ras..."
528,0.006502,"(Rainbow Bell Peppers, Strawberries, Raspberries)"
529,0.004962,"(Seedless Cucumbers, Rainbow Bell Peppers, Str..."
530,0.006512,"(Seedless Cucumbers, Strawberries, Raspberries)"


In [16]:
rules5 = association_rules(itemset5, metric="lift", min_threshold=1)
rules5 = rules5.sort_values(['confidence','lift'], ascending=[False,False])
rules5.head(60)

NameError: name 'itemset5' is not defined

In [46]:
rules5.to_csv('basket5.csv',index=False)

## Recommender

In [14]:
all_orders = pd.read_csv('cluster_all_orders.csv')
all_orders.shape

(32434489, 13)

In [27]:
list(all_orders['user_id'])[:3]

[202279, 202279, 202279]

In [32]:
user = input('Input the user_id here:')
# product = input('Input product_id here:')

user_ids = list(all_orders['user_id'])

for users in user_ids:
    if user == users:
        print('hello')
    #cluster = all_orders[all_orders['user_id']==user]
#     cluster_label = cluster['cluster'].value
    #print('hello')
#     if cluster_label == 0:
#         rule0[rule0['antecedents']==product]

Input the user_id here:202279
