In [3]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import transactionencoder

In [4]:
data_raw = [
    ['milk', 'onion', 'nutmeg', 'kidney beans', 'eggs', 'yogurt'],
    ['dill', 'onion', 'nutmeg', 'kidney beans', 'eggs', 'yogurt'],
    ['milk', 'apple', 'kidney beans', 'eggs'],
    ['milk', 'unicorn', 'corn', 'kidney beans', 'yogurt'],
    ['corn', 'onion', 'kidney beans', 'ice cream', 'eggs']
]

In [5]:
# fix data formating before convert into dataframe
te = transactionencoder.TransactionEncoder()
te_ary = te.fit(data_raw).transform(data_raw)

In [6]:
# convert raw data into data frame
dataset = pd.DataFrame(te_ary, columns=te.columns_)
dataset.head()

Unnamed: 0,apple,corn,dill,eggs,ice cream,kidney beans,milk,nutmeg,onion,unicorn,yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


In [7]:
# search the transaction with support >= 0.6
frequent_itemset = apriori(dataset, min_support=0.6, use_colnames=True)
frequent_itemset

Unnamed: 0,support,itemsets
0,0.8,(eggs)
1,1.0,(kidney beans)
2,0.6,(milk)
3,0.6,(onion)
4,0.6,(yogurt)
5,0.8,"(kidney beans, eggs)"
6,0.6,"(onion, eggs)"
7,0.6,"(kidney beans, milk)"
8,0.6,"(kidney beans, onion)"
9,0.6,"(kidney beans, yogurt)"


In [8]:
# filter the transaction with confidence >= 0.5
# show the dataframe
result = association_rules(frequent_itemset, metric="confidence", min_threshold=0.5)
result

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(kidney beans),(eggs),1.0,0.8,0.8,0.8,1.0,0.0,1.0
1,(eggs),(kidney beans),0.8,1.0,0.8,1.0,1.0,0.0,inf
2,(onion),(eggs),0.6,0.8,0.6,1.0,1.25,0.12,inf
3,(eggs),(onion),0.8,0.6,0.6,0.75,1.25,0.12,1.6
4,(kidney beans),(milk),1.0,0.6,0.6,0.6,1.0,0.0,1.0
5,(milk),(kidney beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
6,(kidney beans),(onion),1.0,0.6,0.6,0.6,1.0,0.0,1.0
7,(onion),(kidney beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
8,(kidney beans),(yogurt),1.0,0.6,0.6,0.6,1.0,0.0,1.0
9,(yogurt),(kidney beans),0.6,1.0,0.6,1.0,1.0,0.0,inf


In [9]:
# simplify the dataframe
result_simplify = result[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
result_simplify

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(kidney beans),(eggs),0.8,0.8,1.0
1,(eggs),(kidney beans),0.8,1.0,1.0
2,(onion),(eggs),0.6,1.0,1.25
3,(eggs),(onion),0.6,0.75,1.25
4,(kidney beans),(milk),0.6,0.6,1.0
5,(milk),(kidney beans),0.6,1.0,1.0
6,(kidney beans),(onion),0.6,0.6,1.0
7,(onion),(kidney beans),0.6,1.0,1.0
8,(kidney beans),(yogurt),0.6,0.6,1.0
9,(yogurt),(kidney beans),0.6,1.0,1.0
