In [8]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import transactionencoder

In [3]:
# read the dataset
data_raw = pd.read_csv("Datasets/transaksi_belanja_supermarket.csv")
data_raw.head()

Unnamed: 0,Customer_ID,No_Transaksi,Tanggal,Items
0,10253,548252,6/14/2017,"Telur, Minyak, Permen, Tepung"
1,10104,548253,12/25/2017,"Deterjen, Kecap, Susu, Sabun"
2,10271,548530,12/10/2017,"Telur, Minyak, Kecap, Snack, Permen, Tepung"
3,10257,548436,5/21/2018,"Deterjen, Kecap, Sabun"
4,10281,548442,1/2/2018,"Telur, Minyak, Kecap, Snack, Permen, Tepung"


In [17]:
# take the last column
# convert to an array
data = []
for i in range(0, 350):
    data.append(str(data_raw.values[i,3]).split(", "))

In [10]:
# fix data formating before convert into dataframe (true/false table)
te = transactionencoder.TransactionEncoder()
te_ary = te.fit(data).transform(data)

In [11]:
# convert raw data into data frame
dataframe = pd.DataFrame(te_ary, columns=te.columns_)
dataframe.head()

Unnamed: 0,Deterjen,Kecap,Minyak,Permen,Sabun,Snack,Susu,Telur,Tepung
0,False,False,True,True,False,False,False,True,True
1,True,True,False,False,True,False,True,False,False
2,False,True,True,True,False,True,False,True,True
3,True,True,False,False,True,False,False,False,False
4,False,True,True,True,False,True,False,True,True


In [14]:
# search the transaction with support >= 0.6
frequent_itemset = apriori(dataframe, min_support=0.1, use_colnames=True)
frequent_itemset

Unnamed: 0,support,itemsets
0,0.468571,(Deterjen)
1,0.520000,(Kecap)
2,0.662857,(Minyak)
3,0.502857,(Permen)
4,0.494286,(Sabun)
...,...,...
203,0.128571,"(Telur, Sabun, Permen, Tepung, Minyak)"
204,0.142857,"(Telur, Snack, Permen, Tepung, Minyak)"
205,0.131429,"(Susu, Telur, Permen, Tepung, Minyak)"
206,0.117143,"(Telur, Snack, Sabun, Tepung, Minyak)"


In [15]:
# filter the transaction with confidence >= 0.5
# show the dataframe
result = association_rules(frequent_itemset, metric="confidence", min_threshold=0.5)
result

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Deterjen),(Kecap),0.468571,0.520000,0.265714,0.567073,1.090525,0.022057,1.108732
1,(Kecap),(Deterjen),0.520000,0.468571,0.265714,0.510989,1.090525,0.022057,1.086742
2,(Deterjen),(Minyak),0.468571,0.662857,0.305714,0.652439,0.984283,-0.004882,0.970025
3,(Deterjen),(Permen),0.468571,0.502857,0.251429,0.536585,1.067073,0.015804,1.072782
4,(Permen),(Deterjen),0.502857,0.468571,0.251429,0.500000,1.067073,0.015804,1.062857
...,...,...,...,...,...,...,...,...,...
670,"(Minyak, Sabun, Tepung, Telur)",(Susu),0.234286,0.462857,0.117143,0.500000,1.080247,0.008702,1.074286
671,"(Susu, Sabun, Telur)","(Minyak, Tepung)",0.165714,0.525714,0.117143,0.706897,1.344640,0.030024,1.618151
672,"(Susu, Sabun, Tepung)","(Minyak, Telur)",0.157143,0.574286,0.117143,0.745455,1.298055,0.026898,1.672449
673,"(Susu, Minyak, Sabun)","(Tepung, Telur)",0.157143,0.565714,0.117143,0.745455,1.317723,0.028245,1.706122


In [16]:
# simplify the dataframe
result_simplify = result[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
result_simplify


Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(Deterjen),(Kecap),0.265714,0.567073,1.090525
1,(Kecap),(Deterjen),0.265714,0.510989,1.090525
2,(Deterjen),(Minyak),0.305714,0.652439,0.984283
3,(Deterjen),(Permen),0.251429,0.536585,1.067073
4,(Permen),(Deterjen),0.251429,0.500000,1.067073
...,...,...,...,...,...
670,"(Minyak, Sabun, Tepung, Telur)",(Susu),0.117143,0.500000,1.080247
671,"(Susu, Sabun, Telur)","(Minyak, Tepung)",0.117143,0.706897,1.344640
672,"(Susu, Sabun, Tepung)","(Minyak, Telur)",0.117143,0.745455,1.298055
673,"(Susu, Minyak, Sabun)","(Tepung, Telur)",0.117143,0.745455,1.317723
