In [71]:
import pandas as pd
from utils import *

from mlxtend.frequent_patterns import apriori, association_rules 
import matplotlib.pyplot as plt


# Load databases
go_sales = load_db('go_sales.sqlite')

# Load tables
order_details = load_table(go_sales, 'ORDER_DETAILS')
order_headers = load_table(go_sales, 'ORDER_HEADER')
product = load_table(go_sales, 'PRODUCT')

In [72]:
# Join tables

joined = pd.merge(order_headers, order_details, on='ORDER_NUMBER')
joined = pd.merge(joined, product, on='PRODUCT_NUMBER')
filtered = joined[['ORDER_NUMBER', 'PRODUCT_NAME']]
filtered

Unnamed: 0,ORDER_NUMBER,PRODUCT_NAME
0,1153,TrailChef Canteen
1,1153,Firefly 2
2,1153,Firefly Multi-light
3,1153,EverGlow Single
4,1153,EverGlow Butane
...,...,...
43058,9479,Granite Pulley
43059,9479,Granite Chalk Bag
43060,9479,Granite Ice
43061,9479,Seeker 35


In [73]:
one_hot_encode = pd.get_dummies(filtered['PRODUCT_NAME'])
concatenated = pd.concat([filtered['ORDER_NUMBER'], one_hot_encode], axis=1)
grouped = concatenated.groupby('ORDER_NUMBER').sum().reset_index()
only_products = grouped.drop('ORDER_NUMBER', axis=1)
only_products = only_products.astype(bool)
only_products


Unnamed: 0,Aloe Relief,Bear Edge,Bear Survival Edge,Blue Steel Max Putter,Blue Steel Putter,BugShield Extreme,BugShield Lotion,BugShield Lotion Lite,BugShield Natural,BugShield Spray,...,TrailChef Canteen,TrailChef Cook Set,TrailChef Cup,TrailChef Deluxe Cook Set,TrailChef Double Flame,TrailChef Kettle,TrailChef Kitchen Kit,TrailChef Single Flame,TrailChef Utensils,TrailChef Water Bag
0,False,False,False,False,False,True,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
2,False,False,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
3,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False
4,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5355,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
5356,False,False,False,False,False,True,False,False,False,False,...,False,True,True,False,False,False,False,True,True,False
5357,True,False,False,False,False,False,False,False,False,False,...,False,False,False,True,False,False,True,False,False,False
5358,False,False,False,False,False,False,False,False,False,False,...,True,False,False,False,True,False,False,False,True,True


In [76]:
frq_items = apriori(only_products, min_support = 0.01, use_colnames = True)
frq_items

Unnamed: 0,support,itemsets
0,0.057463,(Aloe Relief)
1,0.082649,(Bear Edge)
2,0.073507,(Bear Survival Edge)
3,0.066791,(Blue Steel Max Putter)
4,0.072761,(Blue Steel Putter)
...,...,...
1902,0.016978,"(TrailChef Water Bag, TrailChef Kitchen Kit)"
1903,0.014552,"(TrailChef Single Flame, TrailChef Utensils)"
1904,0.012313,"(TrailChef Single Flame, TrailChef Water Bag)"
1905,0.015672,"(TrailChef Utensils, TrailChef Water Bag)"


In [81]:
rules = association_rules(frq_items, metric ="confidence", min_threshold = 0.3)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Blue Steel Max Putter),(Blue Steel Putter),0.066791,0.072761,0.020149,0.301676,4.146111,0.015289,1.327806,0.813119
1,(Blue Steel Max Putter),(Hailstorm Titanium Irons),0.066791,0.077985,0.025746,0.385475,4.942931,0.020538,1.500370,0.854783
2,(Hailstorm Titanium Irons),(Blue Steel Max Putter),0.077985,0.066791,0.025746,0.330144,4.942931,0.020538,1.393148,0.865160
3,(Hailstorm Titanium Woods Set),(Blue Steel Max Putter),0.062873,0.066791,0.022015,0.350148,5.242445,0.017816,1.436034,0.863543
4,(Blue Steel Max Putter),(Hailstorm Titanium Woods Set),0.066791,0.062873,0.022015,0.329609,5.242445,0.017816,1.397881,0.867168
...,...,...,...,...,...,...,...,...,...,...
123,(Lady Hailstorm Titanium Irons),(Lady Hailstorm Titanium Woods Set),0.054664,0.059142,0.020336,0.372014,6.290199,0.017103,1.498214,0.889655
124,(Sun Shelter Stick),(Sun Shelter 30),0.058955,0.104104,0.018284,0.310127,2.978994,0.012146,1.298638,0.705935
125,"(Blue Steel Max Putter, Lady Hailstorm Titaniu...",(Lady Hailstorm Titanium Irons),0.022761,0.054664,0.010448,0.459016,8.397023,0.009204,1.747439,0.901428
126,"(Lady Hailstorm Titanium Woods Set, Lady Hails...",(Blue Steel Max Putter),0.020336,0.066791,0.010448,0.513761,7.692071,0.009090,1.919241,0.888055
