In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs

In [29]:
pip install mlxtend

Note: you may need to restart the kernel to use updated packages.


##### CREATE THE DATASET

In [30]:
dataset = [
    ['Apple', 'Beer', 'Rice','Chicken'],
    ['Apple', 'Beer', 'Rice'],
    ['Apple', 'Beer'],
    ['Apple', 'Peer',],
    ['Milk', 'Beer', 'Rice','Chicken'],
    ['Milk', 'Beer', 'Rice'],
    ['Milk', 'Beer'],
    ['Milk', 'Peer'],
]
    

In [31]:
dataset

[['Apple', 'Beer', 'Rice', 'Chicken'],
 ['Apple', 'Beer', 'Rice'],
 ['Apple', 'Beer'],
 ['Apple', 'Peer'],
 ['Milk', 'Beer', 'Rice', 'Chicken'],
 ['Milk', 'Beer', 'Rice'],
 ['Milk', 'Beer'],
 ['Milk', 'Peer']]

In [32]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

In [33]:
te = TransactionEncoder()
encoded = te.fit_transform(dataset)

In [34]:
encoded

array([[ True,  True,  True, False, False,  True],
       [ True,  True, False, False, False,  True],
       [ True,  True, False, False, False, False],
       [ True, False, False, False,  True, False],
       [False,  True,  True,  True, False,  True],
       [False,  True, False,  True, False,  True],
       [False,  True, False,  True, False, False],
       [False, False, False,  True,  True, False]])

In [35]:
te.columns_

['Apple', 'Beer', 'Chicken', 'Milk', 'Peer', 'Rice']

In [36]:
df = pd.DataFrame(encoded, columns=te.columns_)

In [37]:
df

Unnamed: 0,Apple,Beer,Chicken,Milk,Peer,Rice
0,True,True,True,False,False,True
1,True,True,False,False,False,True
2,True,True,False,False,False,False
3,True,False,False,False,True,False
4,False,True,True,True,False,True
5,False,True,False,True,False,True
6,False,True,False,True,False,False
7,False,False,False,True,True,False


In [38]:
from mlxtend.frequent_patterns import fpgrowth, association_rules


In [39]:
# 1. Find freqent itemset
freq_itemset = fpgrowth(df, min_support=0.25, use_colnames=True)

In [40]:
freq_itemset

Unnamed: 0,support,itemsets
0,0.75,(Beer)
1,0.5,(Rice)
2,0.5,(Apple)
3,0.25,(Chicken)
4,0.25,(Peer)
5,0.5,(Milk)
6,0.5,"(Beer, Rice)"
7,0.375,"(Beer, Apple)"
8,0.25,"(Rice, Apple)"
9,0.25,"(Beer, Rice, Apple)"


In [41]:
rules = association_rules(freq_itemset, metric='confidence', min_threshold=0.5)

In [42]:
rules.shape

(28, 10)

In [43]:
rules = rules[['antecedents','consequents','support','confidence']]

In [44]:
rules

Unnamed: 0,antecedents,consequents,support,confidence
0,(Beer),(Rice),0.5,0.666667
1,(Rice),(Beer),0.5,1.0
2,(Beer),(Apple),0.375,0.5
3,(Apple),(Beer),0.375,0.75
4,(Rice),(Apple),0.25,0.5
5,(Apple),(Rice),0.25,0.5
6,"(Beer, Rice)",(Apple),0.25,0.5
7,"(Beer, Apple)",(Rice),0.25,0.666667
8,"(Rice, Apple)",(Beer),0.25,1.0
9,(Rice),"(Beer, Apple)",0.25,0.5


##### SHORT THE RULES

In [45]:
# By confidence decending order

In [46]:
rules.sort_values(by=['confidence','support'], ascending=False)

Unnamed: 0,antecedents,consequents,support,confidence
1,(Rice),(Beer),0.5,1.0
8,"(Rice, Apple)",(Beer),0.25,1.0
12,(Chicken),(Rice),0.25,1.0
13,(Chicken),(Beer),0.25,1.0
15,"(Beer, Chicken)",(Rice),0.25,1.0
16,"(Rice, Chicken)",(Beer),0.25,1.0
18,(Chicken),"(Beer, Rice)",0.25,1.0
25,"(Milk, Rice)",(Beer),0.25,1.0
3,(Apple),(Beer),0.375,0.75
20,(Milk),(Beer),0.375,0.75


##### Filter he rules

In [47]:
nrules = rules[rules['confidence'] > 0.5]

In [48]:
nrules.shape

(13, 4)

In [49]:
nrules[nrules['antecedents'] == {'Apple','Beer'}]['consequents']

7    (Rice)
Name: consequents, dtype: object

##### EXPORT THE RULES

In [51]:
nrules.to_csv('rules1.csv', index = False)