In [1]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [2]:
order_products_train_df = pd.read_csv("./Desktop/instacart-market-basket-analysis/order_products__train.csv")
order_products_prior_df = pd.read_csv("./Desktop/instacart-market-basket-analysis/order_products__prior.csv")
orders_df = pd.read_csv("./Desktop/instacart-market-basket-analysis/orders.csv")
products_df = pd.read_csv("./Desktop/instacart-market-basket-analysis/products.csv")
aisles_df = pd.read_csv("./Desktop/instacart-market-basket-analysis/aisles.csv")
departments_df = pd.read_csv("./Desktop/instacart-market-basket-analysis/departments.csv")

In [3]:
order_products_all = pd.concat([order_products_train_df, order_products_prior_df], axis = 0)

In [4]:
product_total_df = order_products_all.groupby("product_id",as_index = False)["order_id"].count()

In [5]:
maxorders = 100
product_total_df = product_total_df.sort_values("order_id",ascending = False)

orders_products = product_total_df.iloc[0:100,:]
customer_order_products = orders_products.merge(products_df, on ="product_id")
productId = customer_order_products.loc[:,["product_id"]]

In [6]:
# most frequently bought items
frequent_items = order_products_all[0:0]
for i in range(0,99):
    Product = productId.iloc[i]['product_id']
    stDf = order_products_all[order_products_all.product_id == Product] 
    frequent_items = frequent_items.append(stDf, ignore_index = False)
frequent_items.head()

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered
115,226,24852,2,0
156,473,24852,2,0
196,878,24852,2,1
272,1042,24852,1,1
297,1139,24852,1,1


In [7]:
frequent_items['reordered'] = 1

In [8]:
basket = frequent_items.groupby(['order_id', 'product_id'])['reordered'].sum().unstack().reset_index().fillna(0).set_index('order_id')

In [9]:
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

In [10]:
basket_sets = basket.applymap(encode_units)
basket_sets.head()

product_id,196,3957,4210,4605,4799,4920,5077,5450,5785,5876,...,46667,46906,46979,47144,47209,47626,47766,48679,49235,49683
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
frequent_itemsets = apriori(basket_sets, min_support=0.02, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.031514,(4605)
1,0.035537,(4920)
2,0.025855,(5077)
3,0.021353,(5450)
4,0.020226,(5785)
5,0.037381,(5876)
6,0.035751,(8277)
7,0.029005,(8518)
8,0.023870,(9076)
9,0.024529,(10749)


In [12]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(13176),(21137),0.161785,0.112891,0.026505,0.163832,1.451233,0.008241,1.060921
1,(21137),(13176),0.112891,0.161785,0.026505,0.234787,1.451233,0.008241,1.095402
2,(13176),(21903),0.161785,0.103112,0.021551,0.133208,1.291881,0.004869,1.034722
3,(21903),(13176),0.103112,0.161785,0.021551,0.209007,1.291881,0.004869,1.059699
4,(13176),(47209),0.161785,0.090483,0.02653,0.163981,1.812281,0.011891,1.087914
5,(47209),(13176),0.090483,0.161785,0.02653,0.293199,1.812281,0.011891,1.185929
6,(21137),(24852),0.112891,0.201259,0.023895,0.211665,1.051702,0.001175,1.013199
7,(24852),(21137),0.201259,0.112891,0.023895,0.118728,1.051702,0.001175,1.006623
8,(24852),(21903),0.201259,0.103112,0.021873,0.108683,1.054029,0.001121,1.00625
9,(21903),(24852),0.103112,0.201259,0.021873,0.212133,1.054029,0.001121,1.013802


In [13]:
rules[ (rules['lift'] > 1) &
       (rules['confidence'] >= 0.1) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(13176),(21137),0.161785,0.112891,0.026505,0.163832,1.451233,0.008241,1.060921
1,(21137),(13176),0.112891,0.161785,0.026505,0.234787,1.451233,0.008241,1.095402
2,(13176),(21903),0.161785,0.103112,0.021551,0.133208,1.291881,0.004869,1.034722
3,(21903),(13176),0.103112,0.161785,0.021551,0.209007,1.291881,0.004869,1.059699
4,(13176),(47209),0.161785,0.090483,0.02653,0.163981,1.812281,0.011891,1.087914
5,(47209),(13176),0.090483,0.161785,0.02653,0.293199,1.812281,0.011891,1.185929
6,(21137),(24852),0.112891,0.201259,0.023895,0.211665,1.051702,0.001175,1.013199
7,(24852),(21137),0.201259,0.112891,0.023895,0.118728,1.051702,0.001175,1.006623
8,(24852),(21903),0.201259,0.103112,0.021873,0.108683,1.054029,0.001121,1.00625
9,(21903),(24852),0.103112,0.201259,0.021873,0.212133,1.054029,0.001121,1.013802
