# Association Rules from Cineplex Concessions

- Stephen W. Thomas
- October 2020
- For: MMA/GMMA/MMAI 869

This Notebook will find basic association rules.

In [11]:
import pandas as pd
import numpy as np

from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_colwidth', None)

# Load in the data 

In [12]:
version_str = "_v3"
input_dir = "clean/"

In [13]:
items = pd.read_excel("items"+version_str+"_customYM"+".xlsx")
visit_items = pd.read_csv(input_dir+"visit_items"+version_str+".csv", encoding = "ISO-8859-1")
visits = pd.read_csv(input_dir+'visits'+version_str+'.csv')

In [14]:
visit_items = pd.merge(visits, visit_items[['Visit_ID','CINEPLEX_Item_ID']], 
                            left_on='Visit_ID', right_on='Visit_ID', how='left')
visit_items = pd.merge(visit_items, items[['CINEPLEX_Item_ID','CINEPLEX_Item_Description','US_Is_Butter','US_Is_LrgPopcorn',
            'Is_Box_Office_Item','Item_Class_Description1','Item_Class_Description2', 'Item_Class_Description3',                                    
            'YM_drink','YM_appetizer','YM_snack','YM_mainCourse','YM_dessert','YM_otherFoodItem','YM_BUTTER'
            ,'YM_Chocolate','YM_Student', 'YM_Gamer', 'YM_Pizza', 'YM_HotDog', 'YM_Nachos', 'YM_Beer', 'YM_Wine'
           , 'YM_Cocktail', 'YM_Coffee', 'YM_Tea', 'YM_Burger', 'YM_Salad', 'YM_Poutine', 'YM_Wings', 'YM_fountainDrink'
           , 'YM_Calamari', 'YM_vitaminWater', 'YM_popcorn']], 
                            left_on='CINEPLEX_Item_ID', right_on='CINEPLEX_Item_ID', how='left')

In [15]:
our_cols = ['YM_drink','YM_appetizer','YM_snack','YM_mainCourse','YM_dessert','YM_otherFoodItem','YM_BUTTER'
            ,'YM_Chocolate','YM_Student', 'YM_Gamer', 'YM_Pizza', 'YM_HotDog', 'YM_Nachos', 'YM_Beer', 'YM_Wine'
           , 'YM_Cocktail', 'YM_Coffee', 'YM_Tea', 'YM_Burger', 'YM_Salad', 'YM_Poutine', 'YM_Wings', 'YM_fountainDrink'
           , 'YM_Calamari', 'YM_vitaminWater', 'YM_popcorn']

In [16]:
pd.options.display.max_columns = None

visit_items

Unnamed: 0,Visit_ID,CINEPLEX_D_Membership_ID,Visit_Date,Visit_TimeSlice_ID,Location_ID,Auditorium_Experience_ID,Film_ID,Seating_Experience_ID,Sales_Channel_ID,Performance_Experience_ID,Performance_Type_ID,Spend,Revenue,Points_Earned,Points_Redeemed,Discount,Has_US_Is_LrgPopcorn,Has_US_Is_Butter,Has_US_Is_Alcohol,Has_US_Is_Candy,Has_US_Is_Coffee,Has_US_Is_Kiddietray,Has_US_Is_Voucher,Has_US_Is_MenuFood,CINEPLEX_Item_ID,CINEPLEX_Item_Description,US_Is_Butter,US_Is_LrgPopcorn,Is_Box_Office_Item,Item_Class_Description1,Item_Class_Description2,Item_Class_Description3,YM_drink,YM_appetizer,YM_snack,YM_mainCourse,YM_dessert,YM_otherFoodItem,YM_BUTTER,YM_Chocolate,YM_Student,YM_Gamer,YM_Pizza,YM_HotDog,YM_Nachos,YM_Beer,YM_Wine,YM_Cocktail,YM_Coffee,YM_Tea,YM_Burger,YM_Salad,YM_Poutine,YM_Wings,YM_fountainDrink,YM_Calamari,YM_vitaminWater,YM_popcorn
0,2,23777,2019-01-01,15,7253,1,28757,1,8,1,1,15.04,15.04,100,0,1.68,0,0,0,0,0,0,0,0,0,<Unknown>,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2,23777,2019-01-01,15,7253,1,28757,1,8,1,1,15.04,15.04,100,0,1.68,0,0,0,0,0,0,0,0,0,<Unknown>,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,
2,3,60723,2019-01-01,15,7123,0,0,0,8,0,0,29.61,29.61,297,0,3.29,0,1,0,0,0,0,1,1,93854,VHO CORN - BUTTER - REGULAR,1,0,0,VHO-POPCORN,0,0,,,,,,,1.0,,,,,,,,,,,,,,,,,,,0.0
3,3,60723,2019-01-01,15,7123,0,0,0,8,0,0,29.61,29.61,297,0,3.29,0,1,0,0,0,0,1,1,93461,VHO CBO CPN - CNO (CINEPLEX NIGHT OUT),0,0,0,COUPONS,0,0,,,,1.0,,,,,,,,,,,,,,,,,,,,,,
4,3,60723,2019-01-01,15,7123,0,0,0,8,0,0,29.61,29.61,297,0,3.29,0,1,0,0,0,0,1,1,93854,VHO CORN - BUTTER - REGULAR,1,0,0,VHO-POPCORN,0,0,,,,,,,1.0,,,,,,,,,,,,,,,,,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170055,141021,10147219,2019-12-31,21,7415,1,31521,1,8,1,1,22.45,22.45,171,0,2.50,0,1,0,0,0,0,0,0,93846,VHO CORN - POPCORN REGULAR 130oz.,0,0,0,VHO-POPCORN,0,0,,1.0,,,,,,,,,,,,,,,,,,,,,,,,1.0
170056,141021,10147219,2019-12-31,21,7415,1,31521,1,8,1,1,22.45,22.45,171,0,2.50,0,1,0,0,0,0,0,0,81048,VHO CORN - BUTTER - LAYERED REGULAR,1,0,0,VHO-POPCORN,0,0,,,,,,,1.0,,,,,,,,,,,,,,,,,,,0.0
170057,141030,10260700,2019-12-31,31,9109,1,31897,1,8,1,1,0.00,32.25,0,3750,0.00,0,0,0,0,0,0,0,0,0,<Unknown>,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,
170058,141030,10260700,2019-12-31,31,9109,1,31897,1,8,1,1,0.00,32.25,0,3750,0.00,0,0,0,0,0,0,0,0,0,<Unknown>,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,


In [17]:
visit_items = visit_items.fillna(0)
visit_items = visit_items.groupby('Visit_ID').agg({
#                                     'YM_drink':'max',
#                                      'YM_appetizer':'max',
#                                      'YM_snack':'max',
#                                      'YM_mainCourse':'max',
                                     'YM_dessert':'max',
#                                      'YM_otherFoodItem':'max',
                                     'YM_BUTTER':'max',
                                     'YM_Chocolate':'max',
                                     'YM_Student':'max','YM_Gamer':'max', 
                                     'YM_Pizza':'max', 'YM_HotDog':'max', 
                                     'YM_Nachos':'max', 'YM_Beer':'max', 
                                     'YM_Wine':'max', 
                                    'YM_Cocktail':'max', 
                                    'YM_Coffee':'max', 
                                    'YM_Tea':'max', 
                                                   'YM_Burger':'max', 
                                                   'YM_Salad':'max', 
                                                   'YM_Poutine':'max', 
                                                   'YM_Wings':'max',
                                                   'YM_fountainDrink':'max',
                                                   'YM_Calamari':'max',
                                                   'YM_vitaminWater':'max', 
                                                   'YM_popcorn':'max'})

In [22]:
visit_items.sample(n=10)

Unnamed: 0_level_0,YM_dessert,YM_BUTTER,YM_Chocolate,YM_Student,YM_Gamer,YM_Pizza,YM_HotDog,YM_Nachos,YM_Beer,YM_Wine,YM_Cocktail,YM_Coffee,YM_Tea,YM_Burger,YM_Salad,YM_Poutine,YM_Wings,YM_fountainDrink,YM_Calamari,YM_vitaminWater,YM_popcorn
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
23815,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11107,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
94938,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
822,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
31071,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24780,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
54124,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
40339,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
69182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13741,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0


In [23]:
# Drop all non concession visits
visit_items = visit_items.loc[~(visit_items==0).all(axis=1)]

In [24]:
trans = visit_items

In [25]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.float_format', lambda x: '%.1f' % x):
    display(trans.head())

Unnamed: 0_level_0,YM_dessert,YM_BUTTER,YM_Chocolate,YM_Student,YM_Gamer,YM_Pizza,YM_HotDog,YM_Nachos,YM_Beer,YM_Wine,YM_Cocktail,YM_Coffee,YM_Tea,YM_Burger,YM_Salad,YM_Poutine,YM_Wings,YM_fountainDrink,YM_Calamari,YM_vitaminWater,YM_popcorn
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
8,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
9,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
14,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0


In [26]:
# trans[trans['YM_BUTTER']==1].agg({
#                                     'YM_drink':'max',
#                                      'YM_appetizer':'max',
#                                      'YM_snack':'max',
#                                      'YM_mainCourse':'max',
#                                      'YM_dessert':'max',
#                                      'YM_otherFoodItem':'max',
#                                      'YM_Chocolate':'max',
#                                      'YM_Student':'max','YM_Gamer':'max', 
#                                      'YM_Pizza':'max', 'YM_HotDog':'max', 
#                                      'YM_Nachos':'max', 'YM_Beer':'max', 
#                                      'YM_Wine':'max', 
#                                     'YM_Cocktail':'max', 
#                                     'YM_Coffee':'max', 
#                                     'YM_Tea':'max', 
#                                                    'YM_Burger':'max', 
#                                                    'YM_Salad':'max', 
#                                                    'YM_Poutine':'max', 
#                                                    'YM_Wings':'max',
#                                                    'YM_fountainDrink':'max',
#                                                    'YM_Calamari':'max',
#                                                    'YM_vitaminWater':'max', 
#                                                    'YM_popcorn':'max'})

# Find Frequent Itemsets

mlxtend will use the apriori algorithm to find all the frequent itemsets. It will return them as a pandas DataFrame, so we can use regular pandas functions to slice and dice and explore.

In [27]:
%time frequent_itemsets = apriori(trans, min_support=0.001, use_colnames=True)

CPU times: user 155 ms, sys: 111 ms, total: 265 ms
Wall time: 270 ms


In [28]:
# Add a column that shows how many items are in the itemset
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211 entries, 0 to 210
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   support   211 non-null    float64
 1   itemsets  211 non-null    object 
 2   length    211 non-null    int64  
dtypes: float64(1), int64(1), object(1)
memory usage: 5.1+ KB


In [29]:
display(frequent_itemsets.head(15))

Unnamed: 0,support,itemsets,length
0,0.035542,(YM_dessert),1
1,0.349959,(YM_BUTTER),1
2,0.300424,(YM_Chocolate),1
3,0.001219,(YM_Student),1
4,0.021013,(YM_Gamer),1
5,0.021257,(YM_Pizza),1
6,0.058603,(YM_HotDog),1
7,0.070548,(YM_Nachos),1
8,0.013651,(YM_Beer),1
9,0.001024,(YM_Wine),1


In [30]:
display(frequent_itemsets.tail(10))

Unnamed: 0,support,itemsets,length
201,0.001706,"(YM_fountainDrink, YM_vitaminWater, YM_popcorn, YM_Chocolate)",4
202,0.0039,"(YM_fountainDrink, YM_HotDog, YM_Nachos, YM_popcorn)",4
203,0.001463,"(YM_Poutine, YM_fountainDrink, YM_HotDog, YM_popcorn)",4
204,0.001121,"(YM_BUTTER, YM_dessert, YM_fountainDrink, YM_popcorn, YM_Chocolate)",5
205,0.001219,"(YM_HotDog, YM_Nachos, YM_BUTTER, YM_popcorn, YM_Chocolate)",5
206,0.002925,"(YM_HotDog, YM_BUTTER, YM_fountainDrink, YM_popcorn, YM_Chocolate)",5
207,0.003023,"(YM_Nachos, YM_BUTTER, YM_fountainDrink, YM_popcorn, YM_Chocolate)",5
208,0.001414,"(YM_BUTTER, YM_Poutine, YM_fountainDrink, YM_popcorn, YM_Chocolate)",5
209,0.001219,"(YM_HotDog, YM_Nachos, YM_BUTTER, YM_fountainDrink, YM_popcorn)",5
210,0.001755,"(YM_HotDog, YM_Nachos, YM_fountainDrink, YM_popcorn, YM_Chocolate)",5


In [31]:
display(frequent_itemsets.sort_values(by=['support'], ascending=False).head(15))

Unnamed: 0,support,itemsets,length
18,0.752231,(YM_popcorn),1
16,0.396665,(YM_fountainDrink),1
1,0.349959,(YM_BUTTER),1
43,0.334162,"(YM_BUTTER, YM_popcorn)",2
93,0.30686,"(YM_fountainDrink, YM_popcorn)",2
2,0.300424,(YM_Chocolate),1
55,0.23836,"(YM_popcorn, YM_Chocolate)",2
53,0.175369,"(YM_fountainDrink, YM_Chocolate)",2
41,0.140705,"(YM_BUTTER, YM_fountainDrink)",2
150,0.137975,"(YM_fountainDrink, YM_popcorn, YM_Chocolate)",3


In [32]:
sub = frequent_itemsets[ (frequent_itemsets['length'] == 2) &
                   (frequent_itemsets['support'] >= 0.02) ]
display(sub.sort_values(by=['support'], ascending=False))

Unnamed: 0,support,itemsets,length
43,0.334162,"(YM_BUTTER, YM_popcorn)",2
93,0.30686,"(YM_fountainDrink, YM_popcorn)",2
55,0.23836,"(YM_popcorn, YM_Chocolate)",2
53,0.175369,"(YM_fountainDrink, YM_Chocolate)",2
41,0.140705,"(YM_BUTTER, YM_fountainDrink)",2
28,0.10843,"(YM_BUTTER, YM_Chocolate)",2
77,0.032373,"(YM_Nachos, YM_popcorn)",2
70,0.027644,"(YM_HotDog, YM_popcorn)",2
75,0.024133,"(YM_fountainDrink, YM_Nachos)",2
68,0.021111,"(YM_fountainDrink, YM_HotDog)",2


In [33]:
%time rules = association_rules(frequent_itemsets, min_threshold=0.1)

CPU times: user 13.1 ms, sys: 1.85 ms, total: 15 ms
Wall time: 21 ms


In [34]:
# Add the length of the antecedent (left hand side)
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))

rules.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 588 entries, 0 to 587
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   antecedents         588 non-null    object 
 1   consequents         588 non-null    object 
 2   antecedent support  588 non-null    float64
 3   consequent support  588 non-null    float64
 4   support             588 non-null    float64
 5   confidence          588 non-null    float64
 6   lift                588 non-null    float64
 7   leverage            588 non-null    float64
 8   conviction          588 non-null    float64
 9   antecedent_len      588 non-null    int64  
dtypes: float64(7), int64(1), object(2)
memory usage: 46.1+ KB


In [35]:
display(rules.head(10))

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
0,(YM_dessert),(YM_BUTTER),0.035542,0.349959,0.005997,0.168724,0.482126,-0.006441,0.781981,1
1,(YM_dessert),(YM_Chocolate),0.035542,0.300424,0.006582,0.185185,0.616412,-0.004096,0.85857,1
2,(YM_dessert),(YM_fountainDrink),0.035542,0.396665,0.008678,0.24417,0.615557,-0.00542,0.798241,1
3,(YM_dessert),(YM_popcorn),0.035542,0.752231,0.012286,0.345679,0.459539,-0.01445,0.378667,1
4,(YM_BUTTER),(YM_Chocolate),0.349959,0.300424,0.10843,0.309836,1.031327,0.003294,1.013637,1
5,(YM_Chocolate),(YM_BUTTER),0.300424,0.349959,0.10843,0.360922,1.031327,0.003294,1.017155,1
6,(YM_Student),(YM_BUTTER),0.001219,0.349959,0.001024,0.84,2.400284,0.000597,4.062759,1
7,(YM_Gamer),(YM_BUTTER),0.021013,0.349959,0.00234,0.111369,0.318235,-0.005014,0.731509,1
8,(YM_Pizza),(YM_BUTTER),0.021257,0.349959,0.003315,0.155963,0.445662,-0.004124,0.770158,1
9,(YM_HotDog),(YM_BUTTER),0.058603,0.349959,0.0137,0.233777,0.668013,-0.006809,0.848371,1


## Exploring the Rules

This is where human creativity and "exploration" comes in.

In [52]:
# Find all rules that have at least 3 items on the LHS, confidence of at least .75 and lift of at least 1.2; sort by support

sub = rules.sort_values(by="support", ascending=False)
sub.shape
display(sub.head(10))

(588, 10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
22,(YM_popcorn),(YM_BUTTER),0.752231,0.349959,0.334162,0.444228,1.269374,0.070913,1.16962,1
21,(YM_BUTTER),(YM_popcorn),0.349959,0.752231,0.334162,0.954862,1.269374,0.070913,5.489165,1
68,(YM_popcorn),(YM_fountainDrink),0.752231,0.396665,0.30686,0.407933,1.028407,0.008476,1.019031,1
67,(YM_fountainDrink),(YM_popcorn),0.396665,0.752231,0.30686,0.773599,1.028407,0.008476,1.094383,1
35,(YM_popcorn),(YM_Chocolate),0.752231,0.300424,0.23836,0.316871,1.054745,0.012372,1.024075,1
36,(YM_Chocolate),(YM_popcorn),0.300424,0.752231,0.23836,0.793411,1.054745,0.012372,1.199337,1
32,(YM_fountainDrink),(YM_Chocolate),0.396665,0.300424,0.175369,0.442109,1.471616,0.056202,1.253965,1
33,(YM_Chocolate),(YM_fountainDrink),0.300424,0.396665,0.175369,0.583739,1.471616,0.056202,1.449415,1
18,(YM_BUTTER),(YM_fountainDrink),0.349959,0.396665,0.140705,0.402062,1.013605,0.001889,1.009025,1
19,(YM_fountainDrink),(YM_BUTTER),0.396665,0.349959,0.140705,0.35472,1.013605,0.001889,1.007379,1


In [53]:
# Find all rules that have at least 3 items on the LHS, confidence of at least .75 and lift of at least 1.2; sort by support

sub = rules[ (rules['antecedent_len'] >= 2) &
       (rules['confidence'] > 0.4) &
       (rules['lift'] > 1.2) &
           rules['support'] > 0.2].sort_values(by="lift", ascending=False)
sub.shape
display(sub.head(10))

(156, 10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
84,"(YM_dessert, YM_Nachos)",(YM_HotDog),0.003023,0.058603,0.001463,0.483871,8.256803,0.001285,1.823957,2
83,"(YM_dessert, YM_HotDog)",(YM_Nachos),0.003218,0.070548,0.001463,0.454545,6.443111,0.001236,1.703996,2
503,"(YM_BUTTER, YM_dessert, YM_Chocolate)","(YM_fountainDrink, YM_popcorn)",0.001511,0.30686,0.001121,0.741935,2.417833,0.000658,2.685919,3
554,"(YM_BUTTER, YM_Chocolate, YM_Poutine)","(YM_fountainDrink, YM_popcorn)",0.00195,0.30686,0.001414,0.725,2.362643,0.000815,2.52051,3
141,"(YM_Cocktail, YM_popcorn)",(YM_BUTTER),0.003608,0.349959,0.002925,0.810811,2.316877,0.001663,3.435933,2
300,"(YM_BUTTER, YM_Gamer)","(YM_popcorn, YM_Chocolate)",0.00234,0.23836,0.001268,0.541667,2.272474,0.00071,1.66176,2
539,"(YM_BUTTER, YM_fountainDrink, YM_Nachos)","(YM_popcorn, YM_Chocolate)",0.005753,0.23836,0.003023,0.525424,2.204329,0.001651,1.604884,3
415,"(YM_fountainDrink, YM_Gamer, YM_popcorn)",(YM_Chocolate),0.002048,0.300424,0.001316,0.642857,2.139832,0.000701,1.958812,3
501,"(YM_BUTTER, YM_dessert, YM_fountainDrink)","(YM_popcorn, YM_Chocolate)",0.002243,0.23836,0.001121,0.5,2.097668,0.000587,1.52328,3
541,"(YM_BUTTER, YM_Nachos, YM_Chocolate)","(YM_fountainDrink, YM_popcorn)",0.004778,0.30686,0.003023,0.632653,2.061701,0.001557,1.886882,3


In [54]:
# Find all rules that have at least 3 items on the LHS, confidence of at least .75 and lift of at least 1.2; sort by support

sub = rules[ (rules['antecedent_len'] >= 2) &
       (rules['confidence'] > 0.6) &
       (rules['lift'] > 1.2) &
           rules['support'] > 0.02].sort_values(by="lift", ascending=False)
sub.shape
display(sub.head(10))

(64, 10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
503,"(YM_BUTTER, YM_dessert, YM_Chocolate)","(YM_fountainDrink, YM_popcorn)",0.001511,0.30686,0.001121,0.741935,2.417833,0.000658,2.685919,3
554,"(YM_BUTTER, YM_Chocolate, YM_Poutine)","(YM_fountainDrink, YM_popcorn)",0.00195,0.30686,0.001414,0.725,2.362643,0.000815,2.52051,3
141,"(YM_Cocktail, YM_popcorn)",(YM_BUTTER),0.003608,0.349959,0.002925,0.810811,2.316877,0.001663,3.435933,2
415,"(YM_fountainDrink, YM_Gamer, YM_popcorn)",(YM_Chocolate),0.002048,0.300424,0.001316,0.642857,2.139832,0.000701,1.958812,3
541,"(YM_BUTTER, YM_Nachos, YM_Chocolate)","(YM_fountainDrink, YM_popcorn)",0.004778,0.30686,0.003023,0.632653,2.061701,0.001557,1.886882,3
475,"(YM_fountainDrink, YM_popcorn, YM_vitaminWater)",(YM_Chocolate),0.002779,0.300424,0.001706,0.614035,2.043894,0.000872,1.812537,3
527,"(YM_BUTTER, YM_HotDog, YM_Chocolate)","(YM_fountainDrink, YM_popcorn)",0.00468,0.30686,0.002925,0.625,2.036761,0.001489,1.848374,3
550,"(YM_BUTTER, YM_Chocolate, YM_popcorn, YM_Poutine)",(YM_fountainDrink),0.001901,0.396665,0.001414,0.74359,1.874603,0.00066,2.353006,4
499,"(YM_BUTTER, YM_dessert, YM_popcorn, YM_Chocolate)",(YM_fountainDrink),0.001511,0.396665,0.001121,0.741935,1.870432,0.000522,2.337922,4
274,"(YM_BUTTER, YM_dessert, YM_Chocolate)",(YM_fountainDrink),0.001511,0.396665,0.001121,0.741935,1.870432,0.000522,2.337922,3


In [55]:
# Find all rules that have at least 3 items on the LHS, confidence of at least .75 and lift of at least 1.2; sort by support

sub = rules[ (rules['antecedent_len'] >= 2) &
       (rules['confidence'] > 0.6) &
       (rules['lift'] > 1.2) ].sort_values(by="lift", ascending=False)
sub.shape
display(sub.head(10))

(64, 10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
503,"(YM_BUTTER, YM_dessert, YM_Chocolate)","(YM_fountainDrink, YM_popcorn)",0.001511,0.30686,0.001121,0.741935,2.417833,0.000658,2.685919,3
554,"(YM_BUTTER, YM_Chocolate, YM_Poutine)","(YM_fountainDrink, YM_popcorn)",0.00195,0.30686,0.001414,0.725,2.362643,0.000815,2.52051,3
141,"(YM_Cocktail, YM_popcorn)",(YM_BUTTER),0.003608,0.349959,0.002925,0.810811,2.316877,0.001663,3.435933,2
415,"(YM_fountainDrink, YM_Gamer, YM_popcorn)",(YM_Chocolate),0.002048,0.300424,0.001316,0.642857,2.139832,0.000701,1.958812,3
541,"(YM_BUTTER, YM_Nachos, YM_Chocolate)","(YM_fountainDrink, YM_popcorn)",0.004778,0.30686,0.003023,0.632653,2.061701,0.001557,1.886882,3
475,"(YM_fountainDrink, YM_popcorn, YM_vitaminWater)",(YM_Chocolate),0.002779,0.300424,0.001706,0.614035,2.043894,0.000872,1.812537,3
527,"(YM_BUTTER, YM_HotDog, YM_Chocolate)","(YM_fountainDrink, YM_popcorn)",0.00468,0.30686,0.002925,0.625,2.036761,0.001489,1.848374,3
550,"(YM_BUTTER, YM_Chocolate, YM_popcorn, YM_Poutine)",(YM_fountainDrink),0.001901,0.396665,0.001414,0.74359,1.874603,0.00066,2.353006,4
499,"(YM_BUTTER, YM_dessert, YM_popcorn, YM_Chocolate)",(YM_fountainDrink),0.001511,0.396665,0.001121,0.741935,1.870432,0.000522,2.337922,4
274,"(YM_BUTTER, YM_dessert, YM_Chocolate)",(YM_fountainDrink),0.001511,0.396665,0.001121,0.741935,1.870432,0.000522,2.337922,3


In [56]:
# mlxtend stores the itemsets as frozensets. IF we want to filter rules by what items are in them, 
# we have to check for set membership.

# This little helper will check if any elements in set1 are in set2
def is_any_in(set1, set2):
    c = set1.intersection(set2)
    return len(c) != 0

In [64]:

search_set = set({'YM_popcorn'})

sub = rules[(rules['antecedents'].apply(lambda x: is_any_in(search_set, x)))  &
             (rules['antecedent_len'] >=1)
            & (rules['confidence'] > 0.5)
            & (rules['lift'] >1)].sort_values('lift', ascending=False)

sub.shape
display(sub.head(10))

(30, 10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
141,"(YM_Cocktail, YM_popcorn)",(YM_BUTTER),0.003608,0.349959,0.002925,0.810811,2.316877,0.001663,3.435933,2
415,"(YM_fountainDrink, YM_Gamer, YM_popcorn)",(YM_Chocolate),0.002048,0.300424,0.001316,0.642857,2.139832,0.000701,1.958812,3
475,"(YM_fountainDrink, YM_popcorn, YM_vitaminWater)",(YM_Chocolate),0.002779,0.300424,0.001706,0.614035,2.043894,0.000872,1.812537,3
297,"(YM_BUTTER, YM_Gamer, YM_popcorn)",(YM_Chocolate),0.002243,0.300424,0.001268,0.565217,1.881398,0.000594,1.609024,3
550,"(YM_BUTTER, YM_Chocolate, YM_popcorn, YM_Poutine)",(YM_fountainDrink),0.001901,0.396665,0.001414,0.74359,1.874603,0.00066,2.353006,4
499,"(YM_BUTTER, YM_dessert, YM_popcorn, YM_Chocolate)",(YM_fountainDrink),0.001511,0.396665,0.001121,0.741935,1.870432,0.000522,2.337922,4
152,"(YM_Burger, YM_popcorn)",(YM_BUTTER),0.00195,0.349959,0.001268,0.65,1.857363,0.000585,1.857261,2
471,"(YM_Poutine, YM_popcorn, YM_Chocolate)",(YM_fountainDrink),0.004534,0.396665,0.003267,0.72043,1.816217,0.001468,2.158082,3
535,"(YM_BUTTER, YM_fountainDrink, YM_Nachos, YM_popcorn)",(YM_Chocolate),0.005656,0.300424,0.003023,0.534483,1.779094,0.001324,1.502793,4
299,"(YM_Gamer, YM_popcorn, YM_Chocolate)",(YM_BUTTER),0.002096,0.349959,0.001268,0.604651,1.727779,0.000534,1.644222,3


In [70]:
# Let's find all rules that contain any of the butter items

search_set = set({'YM_BUTTER'})

sub = rules[
    (rules['antecedents'].apply(lambda x: is_any_in(search_set, x)))
        & (rules['antecedent_len'] >=2)
         &   (rules['support'] > 0.05)
            & (rules['lift'] > 1) ].sort_values('lift', ascending=False)

sub.shape
display(sub.head(10))

(12, 10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
349,"(YM_BUTTER, YM_Chocolate)","(YM_fountainDrink, YM_popcorn)",0.10843,0.30686,0.062259,0.574191,1.871183,0.028987,1.627818,2
347,"(YM_BUTTER, YM_fountainDrink)","(YM_popcorn, YM_Chocolate)",0.140705,0.23836,0.062259,0.442481,1.856356,0.028721,1.366124,2
97,"(YM_BUTTER, YM_fountainDrink)",(YM_Chocolate),0.140705,0.300424,0.064161,0.455994,1.517835,0.02189,1.285972,2
343,"(YM_BUTTER, YM_fountainDrink, YM_popcorn)",(YM_Chocolate),0.137097,0.300424,0.062259,0.454125,1.511613,0.021072,1.281568,3
98,"(YM_BUTTER, YM_Chocolate)",(YM_fountainDrink),0.10843,0.396665,0.064161,0.591727,1.491753,0.02115,1.477772,2
345,"(YM_BUTTER, YM_popcorn, YM_Chocolate)",(YM_fountainDrink),0.105651,0.396665,0.062259,0.589294,1.48562,0.020351,1.469019,3
106,"(YM_BUTTER, YM_Chocolate)",(YM_popcorn),0.10843,0.752231,0.105651,0.974371,1.295308,0.024087,9.667357,2
164,"(YM_BUTTER, YM_fountainDrink)",(YM_popcorn),0.140705,0.752231,0.137097,0.974359,1.295293,0.031255,9.66301,2
344,"(YM_BUTTER, YM_fountainDrink, YM_Chocolate)",(YM_popcorn),0.064161,0.752231,0.062259,0.970365,1.289983,0.013996,8.360632,3
348,"(YM_BUTTER, YM_popcorn)","(YM_fountainDrink, YM_Chocolate)",0.334162,0.175369,0.062259,0.186315,1.062413,0.003657,1.013451,2


In [43]:
# Let's find all rules that contain any of the butter items

search_set = set({'YM_BUTTER'})

sub = rules[(rules['consequents'].apply(lambda x: is_any_in(search_set, x))) 
            & (rules['antecedent_len'] >=2)     
            & (rules['confidence'] > 0.5) 
            & (rules['lift'] > 2)].sort_values('support', ascending=False)

sub.shape
display(sub.head(10))

(1, 10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
141,"(YM_Cocktail, YM_popcorn)",(YM_BUTTER),0.003608,0.349959,0.002925,0.810811,2.316877,0.001663,3.435933,2


In [44]:
sub

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
141,"(YM_Cocktail, YM_popcorn)",(YM_BUTTER),0.003608,0.349959,0.002925,0.810811,2.316877,0.001663,3.435933,2


In [None]:
--> who is working on explainability:  partial dependecy plots & feature importances
--> devils advocate, i dont care about a discount on butter