In [24]:
#https://pbpython.com/market-basket-analysis.html

import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

df = pd.read_csv("adventureworks.csv")
df.head()

Unnamed: 0,SalesOrderID,OrderQty,ProductID,Product,TerritoryID,Territory
0,43659,1,776,"Mountain-100 Black, 42",5,Southeast
1,43659,3,777,"Mountain-100 Black, 44",5,Southeast
2,43659,1,778,"Mountain-100 Black, 48",5,Southeast
3,43659,1,771,"Mountain-100 Silver, 38",5,Southeast
4,43659,1,772,"Mountain-100 Silver, 42",5,Southeast


In [4]:
df.columns

Index(['SalesOrderID', 'OrderQty', 'ProductID', 'Product', 'TerritoryID',
       'Territory'],
      dtype='object')

In [6]:
sum(df['SalesOrderID'].isna())

0

In [17]:
df['SalesOrderID'] = df['SalesOrderID'].astype(str)
df['Territory'] = df['Territory'].replace(['Southwest', 'Northwest', 'Southeast', 'Northeast', 'Central'], 'USA')
df['Territory'].value_counts()

USA               60153
Canada            19064
Australia         15058
United Kingdom    10426
France             9088
Germany            7528
Name: Territory, dtype: int64

In [21]:
USA_basket = (df[df['Territory'] =="USA"]
          .groupby(['SalesOrderID', 'Product'])['OrderQty']
          .sum().unstack().reset_index().fillna(0)
          .set_index('SalesOrderID'))


CA_basket = (df[df['Territory'] =="Canada"]
          .groupby(['SalesOrderID', 'Product'])['OrderQty']
          .sum().unstack().reset_index().fillna(0)
          .set_index('SalesOrderID'))


AUS_basket = (df[df['Territory'] =="Australia"]
          .groupby(['SalesOrderID', 'Product'])['OrderQty']
          .sum().unstack().reset_index().fillna(0)
          .set_index('SalesOrderID'))

UK_basket = (df[df['Territory'] =="United Kingdom"]
          .groupby(['SalesOrderID', 'Product'])['OrderQty']
          .sum().unstack().reset_index().fillna(0)
          .set_index('SalesOrderID'))

FR_basket = (df[df['Territory'] =="France"]
          .groupby(['SalesOrderID', 'Product'])['OrderQty']
          .sum().unstack().reset_index().fillna(0)
          .set_index('SalesOrderID'))

GR_basket = (df[df['Territory'] =="Germany"]
          .groupby(['SalesOrderID', 'Product'])['OrderQty']
          .sum().unstack().reset_index().fillna(0)
          .set_index('SalesOrderID'))


In [23]:
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1
    
USA_basket_sets = USA_basket.applymap(encode_units)
CA_basket_sets = CA_basket.applymap(encode_units)
AUS_basket_sets = AUS_basket.applymap(encode_units)
UK_basket_sets = UK_basket.applymap(encode_units)
FR_basket_sets = FR_basket.applymap(encode_units)
GR_basket_sets = GR_basket.applymap(encode_units)



In [32]:
USA_frq = apriori(USA_basket_sets, min_support=0.05, use_colnames=True)
CA_frq = apriori(CA_basket_sets, min_support=0.05, use_colnames=True)
AUS_frq = apriori(AUS_basket_sets, min_support=0.05, use_colnames=True)
UK_freq = apriori(UK_basket_sets, min_support=0.05, use_colnames=True)
FR_freq = apriori(FR_basket_sets, min_support=0.05, use_colnames=True)
GR_freq = apriori(GR_basket_sets, min_support=0.05, use_colnames=True)

In [33]:
USA_rules = association_rules(USA_frq, metric="lift", min_threshold=1)
CA_rules = association_rules(CA_frq, metric="lift", min_threshold=1)
AUS_rules = association_rules(AUS_frq, metric="lift", min_threshold=1)
UK_rules = association_rules(UK_freq, metric="lift", min_threshold=1)
FR_rules = association_rules(FR_freq, metric="lift", min_threshold=1)
GR_rules = association_rules(GR_freq, metric="lift", min_threshold=1)

In [38]:
GR_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Water Bottle - 30 oz.),(Mountain Bottle Cage),0.163172,0.071292,0.056043,0.343458,4.817595,0.04441,1.414544
1,(Mountain Bottle Cage),(Water Bottle - 30 oz.),0.071292,0.163172,0.056043,0.786096,4.817595,0.04441,3.912171
2,(Road Bottle Cage),(Water Bottle - 30 oz.),0.067099,0.163172,0.060618,0.903409,5.536547,0.049669,8.663632
3,(Water Bottle - 30 oz.),(Road Bottle Cage),0.163172,0.067099,0.060618,0.371495,5.536547,0.049669,1.484319


In [28]:
CA_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Water Bottle - 30 oz.),(Mountain Bottle Cage),0.168675,0.089009,0.075731,0.44898,5.044199,0.060718,1.65328
1,(Mountain Bottle Cage),(Water Bottle - 30 oz.),0.089009,0.168675,0.075731,0.850829,5.044199,0.060718,5.572959
