In [116]:
import pandas as pd

data = pd.read_csv('bakery-sale.csv')
data

Unnamed: 0,Transaction,Item,date_time,period_day,weekday_weekend
0,1,Bread,10/30/2016 9:58,morning,weekend
1,2,Scandinavian,10/30/2016 10:05,morning,weekend
2,2,Scandinavian,10/30/2016 10:05,morning,weekend
3,3,Hot chocolate,10/30/2016 10:07,morning,weekend
4,3,Jam,10/30/2016 10:07,morning,weekend
...,...,...,...,...,...
20502,9682,Coffee,4/9/2017 14:32,afternoon,weekend
20503,9682,Tea,4/9/2017 14:32,afternoon,weekend
20504,9683,Coffee,4/9/2017 14:57,afternoon,weekend
20505,9683,Pastry,4/9/2017 14:57,afternoon,weekend


In [117]:
# data preparation
# mengelompokan data
df = data.groupby('Transaction')['Item'].apply(list).reset_index()
df

Unnamed: 0,Transaction,Item
0,1,[Bread]
1,2,"[Scandinavian, Scandinavian]"
2,3,"[Hot chocolate, Jam, Cookies]"
3,4,[Muffin]
4,5,"[Coffee, Pastry, Bread]"
...,...,...
9460,9680,[Bread]
9461,9681,"[Truffles, Tea, Spanish Brunch, Christmas common]"
9462,9682,"[Muffin, Tacos-Fajita, Coffee, Tea]"
9463,9683,"[Coffee, Pastry]"


In [118]:
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
item_lists = df['Item'].tolist()
filtered_transactions = [t for t in item_lists if len(t) > 1]
te_array = te.fit(filtered_transactions).transform(filtered_transactions)
df = pd.DataFrame(te_array, columns=te.columns_)

frequent_itemsets = apriori(df, min_support=0.1, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.338533,(Bread)
1,0.152211,(Cake)
2,0.60336,(Coffee)
3,0.129414,(Pastry)
4,0.106102,(Sandwich)
5,0.199177,(Tea)
6,0.14604,"(Coffee, Bread)"


In [119]:
# implementasi assosiation rule mining
num_itemsets = len(frequent_itemsets)
rules = association_rules(frequent_itemsets, num_itemsets=num_itemsets, metric='support', min_threshold=0)

rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Coffee),(Bread),0.60336,0.338533,0.14604,0.242045,0.714984,1.0,-0.058217,0.8727,-0.501253,0.183502,-0.145869,0.336719
1,(Bread),(Coffee),0.338533,0.60336,0.14604,0.431392,0.714984,1.0,-0.058217,0.697564,-0.376033,0.183502,-0.43356,0.336719


In [120]:
# deployment
import pickle

with open('association_rule.pkl', 'wb') as f:
    pickle.dump(rules, f)

with open('association_rule.pkl', 'rb') as f:
    rules = pickle.load(f)

print(rules)

  antecedents consequents  antecedent support  consequent support  support  \
0    (Coffee)     (Bread)            0.603360            0.338533  0.14604   
1     (Bread)    (Coffee)            0.338533            0.603360  0.14604   

   confidence      lift  representativity  leverage  conviction  \
0    0.242045  0.714984               1.0 -0.058217    0.872700   
1    0.431392  0.714984               1.0 -0.058217    0.697564   

   zhangs_metric   jaccard  certainty  kulczynski  
0      -0.501253  0.183502  -0.145869    0.336719  
1      -0.376033  0.183502  -0.433560    0.336719  
