In [1]:
# import library
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
# menyimpan data file csv ke dalam variable
df_bakery = pd.read_csv('Bakery.csv')

# mencetak dataframe csv
df_bakery

Unnamed: 0,TransactionNo,Items,DateTime,Daypart,DayType
0,1,Bread,2016-10-30 09:58:11,Morning,Weekend
1,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
2,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
3,3,Hot chocolate,2016-10-30 10:07:57,Morning,Weekend
4,3,Jam,2016-10-30 10:07:57,Morning,Weekend
...,...,...,...,...,...
20502,9682,Coffee,2017-09-04 14:32:58,Afternoon,Weekend
20503,9682,Tea,2017-09-04 14:32:58,Afternoon,Weekend
20504,9683,Coffee,2017-09-04 14:57:06,Afternoon,Weekend
20505,9683,Pastry,2017-09-04 14:57:06,Afternoon,Weekend


In [3]:
# mengecek data kosong
df_bakery.isna().sum()

TransactionNo    0
Items            0
DateTime         0
Daypart          0
DayType          0
dtype: int64

In [4]:
# mengubah data menjadi format transaksi
data_items = df_bakery.groupby('TransactionNo')['Items'].apply(list).tolist()

# mencetak 5 list pertama
data_items[:5]

[['Bread'],
 ['Scandinavian', 'Scandinavian'],
 ['Hot chocolate', 'Jam', 'Cookies'],
 ['Muffin'],
 ['Coffee', 'Pastry', 'Bread']]

In [5]:
# inialisasi objek TransactionEncoder
te = TransactionEncoder()

# mengubah data menjadi biner
data_encoded = te.fit(data_items).transform(data_items)

# menyimpan data biner ke dalam dataframe
data_biner = pd.DataFrame(data_encoded, columns=te.columns_)

# mengubah nilai ke integer
data_biner = data_biner.astype(int)

# mencetak data
data_biner

Unnamed: 0,Adjustment,Afternoon with the baker,Alfajores,Argentina Night,Art Tray,Bacon,Baguette,Bakewell,Bare Popcorn,Basket,...,The BART,The Nomad,Tiffin,Toast,Truffles,Tshirt,Valentine's card,Vegan Feast,Vegan mincepie,Victorian Sponge
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9460,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9461,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
9462,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9463,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
# menyimpan itemset yang sering muncul
frequent_itemsets = apriori(data_biner, min_support=0.01, use_colnames=True)

# membuat aturan asosiasi berdasarkan banyaknya itemset
# aturan dibuat berdasarkan lift dengan nilai minimal 1
rules = association_rules(frequent_itemsets, metric = 'lift', min_threshold = 1.0)

# mencetak aturan asosiasi
rules



Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Coffee),(Alfajores),0.478394,0.036344,0.019651,0.041078,1.130235,0.002264,1.004936,0.22091
1,(Alfajores),(Coffee),0.036344,0.478394,0.019651,0.540698,1.130235,0.002264,1.135648,0.119574
2,(Pastry),(Bread),0.086107,0.327205,0.02916,0.33865,1.034977,0.000985,1.017305,0.03698
3,(Bread),(Pastry),0.327205,0.086107,0.02916,0.089119,1.034977,0.000985,1.003306,0.050231
4,(Coffee),(Brownie),0.478394,0.040042,0.019651,0.041078,1.02586,0.000495,1.00108,0.048327
5,(Brownie),(Coffee),0.040042,0.478394,0.019651,0.490765,1.02586,0.000495,1.024293,0.026259
6,(Coffee),(Cake),0.478394,0.103856,0.054728,0.114399,1.101515,0.005044,1.011905,0.176684
7,(Cake),(Coffee),0.103856,0.478394,0.054728,0.526958,1.101515,0.005044,1.102664,0.10284
8,(Cake),(Hot chocolate),0.103856,0.05832,0.01141,0.109868,1.883874,0.005354,1.05791,0.523553
9,(Hot chocolate),(Cake),0.05832,0.103856,0.01141,0.195652,1.883874,0.005354,1.114125,0.498236


In [7]:
# melihat hasil berdasarkan confidence tertinggi
rules_confidence = rules.sort_values(by='confidence', ascending=False)
rules_confidence

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
31,(Toast),(Coffee),0.033597,0.478394,0.023666,0.704403,1.472431,0.007593,1.764582,0.332006
29,(Spanish Brunch),(Coffee),0.018172,0.478394,0.010882,0.598837,1.251766,0.002189,1.300235,0.204851
18,(Medialuna),(Coffee),0.061807,0.478394,0.035182,0.569231,1.189878,0.005614,1.210871,0.170091
22,(Pastry),(Coffee),0.086107,0.478394,0.047544,0.552147,1.154168,0.006351,1.164682,0.146161
1,(Alfajores),(Coffee),0.036344,0.478394,0.019651,0.540698,1.130235,0.002264,1.135648,0.119574
17,(Juice),(Coffee),0.038563,0.478394,0.020602,0.534247,1.11675,0.002154,1.119919,0.108738
25,(Sandwich),(Coffee),0.071844,0.478394,0.038246,0.532353,1.112792,0.003877,1.115384,0.109205
7,(Cake),(Coffee),0.103856,0.478394,0.054728,0.526958,1.101515,0.005044,1.102664,0.10284
27,(Scone),(Coffee),0.034548,0.478394,0.018067,0.522936,1.093107,0.001539,1.093366,0.088224
13,(Cookies),(Coffee),0.054411,0.478394,0.028209,0.518447,1.083723,0.002179,1.083174,0.0817


In [8]:
# melihat hasil berdasarkan lift tertinggi
rules_lift = rules.sort_values(by='lift', ascending=False)
rules_lift

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
41,(Cake),"(Tea, Coffee)",0.103856,0.049868,0.010037,0.096643,1.937977,0.004858,1.051779,0.54009
38,"(Tea, Coffee)",(Cake),0.049868,0.103856,0.010037,0.201271,1.937977,0.004858,1.121962,0.509401
9,(Hot chocolate),(Cake),0.05832,0.103856,0.01141,0.195652,1.883874,0.005354,1.114125,0.498236
8,(Cake),(Hot chocolate),0.103856,0.05832,0.01141,0.109868,1.883874,0.005354,1.05791,0.523553
10,(Tea),(Cake),0.142631,0.103856,0.023772,0.166667,1.604781,0.008959,1.075372,0.439556
11,(Cake),(Tea),0.103856,0.142631,0.023772,0.228891,1.604781,0.008959,1.111865,0.420538
31,(Toast),(Coffee),0.033597,0.478394,0.023666,0.704403,1.472431,0.007593,1.764582,0.332006
30,(Coffee),(Toast),0.478394,0.033597,0.023666,0.04947,1.472431,0.007593,1.016699,0.615122
37,(Pastry),"(Coffee, Bread)",0.086107,0.090016,0.011199,0.130061,1.444872,0.003448,1.046033,0.336907
36,"(Coffee, Bread)",(Pastry),0.090016,0.086107,0.011199,0.124413,1.444872,0.003448,1.043749,0.338354


In [9]:
# melihat hasil berdasarkan confidence tertinggi
rules_support = rules.sort_values(by='support', ascending=False)
rules_support

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
6,(Coffee),(Cake),0.478394,0.103856,0.054728,0.114399,1.101515,0.005044,1.011905,0.176684
7,(Cake),(Coffee),0.103856,0.478394,0.054728,0.526958,1.101515,0.005044,1.102664,0.10284
23,(Coffee),(Pastry),0.478394,0.086107,0.047544,0.099382,1.154168,0.006351,1.01474,0.256084
22,(Pastry),(Coffee),0.086107,0.478394,0.047544,0.552147,1.154168,0.006351,1.164682,0.146161
25,(Sandwich),(Coffee),0.071844,0.478394,0.038246,0.532353,1.112792,0.003877,1.115384,0.109205
24,(Coffee),(Sandwich),0.478394,0.071844,0.038246,0.079947,1.112792,0.003877,1.008807,0.194321
19,(Coffee),(Medialuna),0.478394,0.061807,0.035182,0.073542,1.189878,0.005614,1.012667,0.305936
18,(Medialuna),(Coffee),0.061807,0.478394,0.035182,0.569231,1.189878,0.005614,1.210871,0.170091
15,(Hot chocolate),(Coffee),0.05832,0.478394,0.029583,0.507246,1.060311,0.001683,1.058553,0.060403
14,(Coffee),(Hot chocolate),0.478394,0.05832,0.029583,0.061837,1.060311,0.001683,1.003749,0.109048
