In [7]:
import pandas as pd

In [13]:
data = [['apple', 'beer', 'rice', 'chicken'],
       ['apple', 'beer', 'rice'],
       ['apple', 'beer'],
       ['apple', 'pear'],
       ['milk', 'beer', 'rice', 'chicken'],
       ['milk', 'beer', 'rice'],
       ['milk', 'beer'],
       ['milk', 'pear']]

#### transaction encoder for creating structured data

In [22]:
# !pip install mlxtend

In [24]:
from mlxtend.preprocessing import TransactionEncoder

In [26]:
te = TransactionEncoder()

In [28]:
df_enc = te.fit_transform(data)

In [30]:
df_enc

array([[ True,  True,  True, False, False,  True],
       [ True,  True, False, False, False,  True],
       [ True,  True, False, False, False, False],
       [ True, False, False, False,  True, False],
       [False,  True,  True,  True, False,  True],
       [False,  True, False,  True, False,  True],
       [False,  True, False,  True, False, False],
       [False, False, False,  True,  True, False]])

In [32]:
te.columns_

['apple', 'beer', 'chicken', 'milk', 'pear', 'rice']

In [36]:
df = pd.DataFrame(df_enc, columns= te.columns_)

In [38]:
df

Unnamed: 0,apple,beer,chicken,milk,pear,rice
0,True,True,True,False,False,True
1,True,True,False,False,False,True
2,True,True,False,False,False,False
3,True,False,False,False,True,False
4,False,True,True,True,False,True
5,False,True,False,True,False,True
6,False,True,False,True,False,False
7,False,False,False,True,True,False


### Generate frequent itemset

In [42]:
from mlxtend.frequent_patterns import apriori

In [44]:
freq_itemsets = apriori(df, min_support= 0.25, use_colnames= True)

In [46]:
freq_itemsets

Unnamed: 0,support,itemsets
0,0.5,(apple)
1,0.75,(beer)
2,0.25,(chicken)
3,0.5,(milk)
4,0.25,(pear)
5,0.5,(rice)
6,0.375,"(apple, beer)"
7,0.25,"(rice, apple)"
8,0.25,"(beer, chicken)"
9,0.375,"(beer, milk)"


#### generate the association rules

In [50]:
from mlxtend.frequent_patterns import association_rules

In [52]:
rules = association_rules(freq_itemsets, metric= 'confidence',
                         min_threshold= 0.50)

In [54]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(apple),(beer),0.5,0.75,0.375,0.75,1.0,0.0,1.0,0.0
1,(beer),(apple),0.75,0.5,0.375,0.5,1.0,0.0,1.0,0.0
2,(rice),(apple),0.5,0.5,0.25,0.5,1.0,0.0,1.0,0.0
3,(apple),(rice),0.5,0.5,0.25,0.5,1.0,0.0,1.0,0.0
4,(chicken),(beer),0.25,0.75,0.25,1.0,1.333333,0.0625,inf,0.333333
5,(beer),(milk),0.75,0.5,0.375,0.5,1.0,0.0,1.0,0.0
6,(milk),(beer),0.5,0.75,0.375,0.75,1.0,0.0,1.0,0.0
7,(rice),(beer),0.5,0.75,0.5,1.0,1.333333,0.125,inf,0.5
8,(beer),(rice),0.75,0.5,0.5,0.666667,1.333333,0.125,1.5,1.0
9,(rice),(chicken),0.5,0.25,0.25,0.5,2.0,0.125,1.5,1.0


In [58]:
rules.columns

Index(['antecedents', 'consequents', 'antecedent support',
       'consequent support', 'support', 'confidence', 'lift', 'leverage',
       'conviction', 'zhangs_metric'],
      dtype='object')

In [60]:
rules = rules[['antecedents', 'consequents', 'support', 'confidence']]
rules

Unnamed: 0,antecedents,consequents,support,confidence
0,(apple),(beer),0.375,0.75
1,(beer),(apple),0.375,0.5
2,(rice),(apple),0.25,0.5
3,(apple),(rice),0.25,0.5
4,(chicken),(beer),0.25,1.0
5,(beer),(milk),0.375,0.5
6,(milk),(beer),0.375,0.75
7,(rice),(beer),0.5,1.0
8,(beer),(rice),0.5,0.666667
9,(rice),(chicken),0.25,0.5


### Extract using conditions

In [63]:
rules[rules['confidence'] > 0.5]

Unnamed: 0,antecedents,consequents,support,confidence
0,(apple),(beer),0.375,0.75
4,(chicken),(beer),0.25,1.0
6,(milk),(beer),0.375,0.75
7,(rice),(beer),0.5,1.0
8,(beer),(rice),0.5,0.666667
10,(chicken),(rice),0.25,1.0
13,"(apple, rice)",(beer),0.25,1.0
15,"(apple, beer)",(rice),0.25,0.666667
19,"(rice, chicken)",(beer),0.25,1.0
20,"(beer, chicken)",(rice),0.25,1.0


In [66]:
rules[(rules['confidence'] > 0.5) & (rules['support'] > 0.25)]

Unnamed: 0,antecedents,consequents,support,confidence
0,(apple),(beer),0.375,0.75
6,(milk),(beer),0.375,0.75
7,(rice),(beer),0.5,1.0
8,(beer),(rice),0.5,0.666667


#### Reccomendation

In [69]:
rules[rules['antecedents'] == {'rice'}]

Unnamed: 0,antecedents,consequents,support,confidence
2,(rice),(apple),0.25,0.5
7,(rice),(beer),0.5,1.0
9,(rice),(chicken),0.25,0.5
11,(rice),(milk),0.25,0.5
16,(rice),"(apple, beer)",0.25,0.5
21,(rice),"(beer, chicken)",0.25,0.5
26,(rice),"(beer, milk)",0.25,0.5


#### sort

In [74]:
rules.sort_values(by = 'confidence', ascending= False);

In [76]:
# save the rules

rules.to_csv('rules.csv', index= False)