In [27]:
pip install mlxtend

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


##### Create the dataset

In [28]:
dataset = [
    ['Apple', 'Beer', 'Rice', 'Chicken'],
    ['Apple', 'Beer', 'Rice'],
    ['Apple', 'Beer'],
    ['Apple', 'Pear'],
    ['Milk', 'Beer', 'Rice', 'Chicken'],
    ['Milk', 'Beer', 'Rice'],
    ['Milk', 'Beer'],
    ['Milk', 'Pear'],
] # unstructured data matrix form

In [29]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder # It converts dataset into both numeric and structured form so that processing becomes easy

In [30]:
te = TransactionEncoder()
encoded = te.fit_transform(dataset)

In [31]:
encoded

array([[ True,  True,  True, False, False,  True],
       [ True,  True, False, False, False,  True],
       [ True,  True, False, False, False, False],
       [ True, False, False, False,  True, False],
       [False,  True,  True,  True, False,  True],
       [False,  True, False,  True, False,  True],
       [False,  True, False,  True, False, False],
       [False, False, False,  True,  True, False]])

In [32]:
te.columns_

['Apple', 'Beer', 'Chicken', 'Milk', 'Pear', 'Rice']

In [33]:
df = pd.DataFrame(encoded, columns = te.columns_)
df

Unnamed: 0,Apple,Beer,Chicken,Milk,Pear,Rice
0,True,True,True,False,False,True
1,True,True,False,False,False,True
2,True,True,False,False,False,False
3,True,False,False,False,True,False
4,False,True,True,True,False,True
5,False,True,False,True,False,True
6,False,True,False,True,False,False
7,False,False,False,True,True,False


In [34]:
from mlxtend.frequent_patterns import fpgrowth, association_rules

In [35]:
# Find frequent itemset
freq_itemset = fpgrowth(df, min_support = 0.25, use_colnames = True) # We want item/itemset present atleast 25% (here, 2 times) in the dataset, use_colnames means using name of items as givn in column names

In [36]:
freq_itemset

Unnamed: 0,support,itemsets
0,0.75,(Beer)
1,0.5,(Rice)
2,0.5,(Apple)
3,0.25,(Chicken)
4,0.25,(Pear)
5,0.5,(Milk)
6,0.5,"(Beer, Rice)"
7,0.375,"(Apple, Beer)"
8,0.25,"(Apple, Rice)"
9,0.25,"(Apple, Beer, Rice)"


In [37]:
# Form association rules

rules = association_rules(freq_itemset, metric = 'confidence', min_threshold = 0.5) # We are applying rule that minimum confidence is 50%

In [38]:
rules.shape

(28, 10)

In [39]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Beer),(Rice),0.75,0.5,0.5,0.666667,1.333333,0.125,1.5,1.0
1,(Rice),(Beer),0.5,0.75,0.5,1.0,1.333333,0.125,inf,0.5
2,(Apple),(Beer),0.5,0.75,0.375,0.75,1.0,0.0,1.0,0.0
3,(Beer),(Apple),0.75,0.5,0.375,0.5,1.0,0.0,1.0,0.0
4,(Apple),(Rice),0.5,0.5,0.25,0.5,1.0,0.0,1.0,0.0
5,(Rice),(Apple),0.5,0.5,0.25,0.5,1.0,0.0,1.0,0.0
6,"(Apple, Beer)",(Rice),0.375,0.5,0.25,0.666667,1.333333,0.0625,1.5,0.4
7,"(Apple, Rice)",(Beer),0.25,0.75,0.25,1.0,1.333333,0.0625,inf,0.333333
8,"(Beer, Rice)",(Apple),0.5,0.5,0.25,0.5,1.0,0.0,1.0,0.0
9,(Apple),"(Beer, Rice)",0.5,0.5,0.25,0.5,1.0,0.0,1.0,0.0


In [40]:
rules = rules[['antecedents','consequents','support','confidence']]

In [41]:
rules

Unnamed: 0,antecedents,consequents,support,confidence
0,(Beer),(Rice),0.5,0.666667
1,(Rice),(Beer),0.5,1.0
2,(Apple),(Beer),0.375,0.75
3,(Beer),(Apple),0.375,0.5
4,(Apple),(Rice),0.25,0.5
5,(Rice),(Apple),0.25,0.5
6,"(Apple, Beer)",(Rice),0.25,0.666667
7,"(Apple, Rice)",(Beer),0.25,1.0
8,"(Beer, Rice)",(Apple),0.25,0.5
9,(Apple),"(Beer, Rice)",0.25,0.5


In [42]:
# Sort the rules

rules.sort_values(by = ['confidence','support'], ascending = False) # sort by highest confidence, then by support within one value of confidence

Unnamed: 0,antecedents,consequents,support,confidence
1,(Rice),(Beer),0.5,1.0
7,"(Apple, Rice)",(Beer),0.25,1.0
11,(Chicken),(Rice),0.25,1.0
13,(Chicken),(Beer),0.25,1.0
14,"(Beer, Chicken)",(Rice),0.25,1.0
16,"(Chicken, Rice)",(Beer),0.25,1.0
17,(Chicken),"(Beer, Rice)",0.25,1.0
25,"(Milk, Rice)",(Beer),0.25,1.0
2,(Apple),(Beer),0.375,0.75
20,(Milk),(Beer),0.375,0.75


In [43]:
# Filter the rules

nrules = rules[rules['confidence']>0.5]

In [44]:
nrules.shape

(13, 4)

In [45]:
nrules

Unnamed: 0,antecedents,consequents,support,confidence
0,(Beer),(Rice),0.5,0.666667
1,(Rice),(Beer),0.5,1.0
2,(Apple),(Beer),0.375,0.75
6,"(Apple, Beer)",(Rice),0.25,0.666667
7,"(Apple, Rice)",(Beer),0.25,1.0
11,(Chicken),(Rice),0.25,1.0
13,(Chicken),(Beer),0.25,1.0
14,"(Beer, Chicken)",(Rice),0.25,1.0
16,"(Chicken, Rice)",(Beer),0.25,1.0
17,(Chicken),"(Beer, Rice)",0.25,1.0


In [46]:
nrules[nrules['antecedents']=={'Rice'}]

Unnamed: 0,antecedents,consequents,support,confidence
1,(Rice),(Beer),0.5,1.0


In [47]:
nrules[nrules['antecedents']=={'Rice'}]['consequents'] # Extract the consequent of the row whose antecedent is rice

1    (Beer)
Name: consequents, dtype: object

In [48]:
nrules[nrules['antecedents']=={'Apple','Beer'}]['consequents']

6    (Rice)
Name: consequents, dtype: object

##### Export the Rules

In [49]:
nrules.to_csv('rules.csv', index = False)