In [0]:
data = [['Milk','Onion','Maggi','KidneyBeans','Eggs','Yogurt'],
        ['Cheese','Onion','Maggi','KidneyBeans','Eggs','Yogurt'],
        ['Milk','Apple','KidneyBeans','Eggs'],
        ['Milk','Corn','KidneyBeans','Yogurt','Biscuit'],
        ['Corn','Onion','Onion','KidneyBeans','IceCream', 'Eggs']]
data

[['Milk', 'Onion', 'Maggi', 'KidneyBeans', 'Eggs', 'Yogurt'],
 ['Cheese', 'Onion', 'Maggi', 'KidneyBeans', 'Eggs', 'Yogurt'],
 ['Milk', 'Apple', 'KidneyBeans', 'Eggs'],
 ['Milk', 'Corn', 'KidneyBeans', 'Yogurt', 'Biscuit'],
 ['Corn', 'Onion', 'Onion', 'KidneyBeans', 'IceCream', 'Eggs']]

In [0]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder 
#Encodes database transaction data in form of a Python list of lists into a NumPy array.
t = TransactionEncoder() 
'''
Via the fit method, 
the TransactionEncoder learns the unique labels in the dataset, 
and via the transform method, 
it transforms the input dataset (a Python list of lists) into a one-hot encoded NumPy boolean array
'''
te = t.fit(data).transform(data)
#the unique column names that correspond to the data array shown above can be accessed via the columns_ attribute
df = pd.DataFrame(te, columns = t.columns_) 
df

Unnamed: 0,Apple,Biscuit,Cheese,Corn,Eggs,IceCream,KidneyBeans,Maggi,Milk,Onion,Yogurt
0,False,False,False,False,True,False,True,True,True,True,True
1,False,False,True,False,True,False,True,True,False,True,True
2,True,False,False,False,True,False,True,False,True,False,False
3,False,True,False,True,False,False,True,False,True,False,True
4,False,False,False,True,True,True,True,False,False,True,False


In [0]:
#For classic integer representation
te.astype("int")

array([[0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1],
       [0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1],
       [1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0],
       [0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1],
       [0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]])

Let the given support be 60%

Find the itemset, for the frequency count more than 60%

In [0]:
from mlxtend.frequent_patterns import apriori, association_rules
apriori(df, min_support = 0.6)

Unnamed: 0,support,itemsets
0,0.8,(4)
1,1.0,(6)
2,0.6,(8)
3,0.6,(9)
4,0.6,(10)
5,0.8,"(4, 6)"
6,0.6,"(9, 4)"
7,0.6,"(8, 6)"
8,0.6,"(9, 6)"
9,0.6,"(10, 6)"


Apriori returns the column indices of the items, which may be useful in downstream operations such as association rule mining. For better readability, we can set use_colnames=True to convert these integer values into the respective item names

In [0]:
apriori(df, min_support=0.6, use_colnames=True)

Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(KidneyBeans)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(KidneyBeans, Eggs)"
6,0.6,"(Eggs, Onion)"
7,0.6,"(KidneyBeans, Milk)"
8,0.6,"(KidneyBeans, Onion)"
9,0.6,"(KidneyBeans, Yogurt)"


Create the frequent itemsets via apriori and add a new column that stores the length of each itemset

In [0]:
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.8,(Eggs),1
1,1.0,(KidneyBeans),1
2,0.6,(Milk),1
3,0.6,(Onion),1
4,0.6,(Yogurt),1
5,0.8,"(KidneyBeans, Eggs)",2
6,0.6,"(Eggs, Onion)",2
7,0.6,"(KidneyBeans, Milk)",2
8,0.6,"(KidneyBeans, Onion)",2
9,0.6,"(KidneyBeans, Yogurt)",2


The generate_rules() function allows you to 

(1) specify your metric of interest and 

(2) the according threshold. Currently implemented measures are confidence and lift. 

Rules derived from the frequent itemsets only if the level of confidence is above the 80 percent threshold (min_threshold=0.8)

In [0]:
rules = association_rules(frequent_itemsets, metric = "confidence", min_threshold = 0.8)
rules = rules.sort_values(['confidence'], ascending = [False]) #Sorting Rules based on confidence
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
1,(Eggs),(KidneyBeans),0.8,1.0,0.8,1.0,1.0,0.0,inf
2,(Onion),(Eggs),0.6,0.8,0.6,1.0,1.25,0.12,inf
3,(Milk),(KidneyBeans),0.6,1.0,0.6,1.0,1.0,0.0,inf
4,(Onion),(KidneyBeans),0.6,1.0,0.6,1.0,1.0,0.0,inf
5,(Yogurt),(KidneyBeans),0.6,1.0,0.6,1.0,1.0,0.0,inf
6,"(KidneyBeans, Onion)",(Eggs),0.6,0.8,0.6,1.0,1.25,0.12,inf
7,"(Eggs, Onion)",(KidneyBeans),0.6,1.0,0.6,1.0,1.0,0.0,inf
8,(Onion),"(KidneyBeans, Eggs)",0.6,0.8,0.6,1.0,1.25,0.12,inf
0,(KidneyBeans),(Eggs),1.0,0.8,0.8,0.8,1.0,0.0,1.0
