In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

In [3]:
#!pip install mlxtend

In [5]:
#Create transaction database
dataset = [['Apple', 'Beer', 'Rice', 'Chicken'],
           ['Apple', 'Beer', 'Rice'],
           ['Apple', 'Beer'],
           ['Apple', 'Bananas'],
           ['Milk', 'Beer', 'Rice', 'Chicken'],
           ['Milk', 'Beer', 'Rice'],
           ['Milk', 'Beer'],
           ['Apple', 'ccc']]


In [6]:
dataset

[['Apple', 'Beer', 'Rice', 'Chicken'],
 ['Apple', 'Beer', 'Rice'],
 ['Apple', 'Beer'],
 ['Apple', 'Bananas'],
 ['Milk', 'Beer', 'Rice', 'Chicken'],
 ['Milk', 'Beer', 'Rice'],
 ['Milk', 'Beer'],
 ['Apple', 'ccc']]

In [7]:
#Transaction Encoder
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)


In [8]:
df

Unnamed: 0,Apple,Bananas,Beer,Chicken,Milk,Rice,ccc
0,True,False,True,True,False,True,False
1,True,False,True,False,False,True,False
2,True,False,True,False,False,False,False
3,True,True,False,False,False,False,False
4,False,False,True,True,True,True,False
5,False,False,True,False,True,True,False
6,False,False,True,False,True,False,False
7,True,False,False,False,False,False,True


In [9]:
#Using Apriori to find frequent itemset
frequent_itemsets = apriori(df, min_support=0.4, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.625,(Apple)
1,0.75,(Beer)
2,0.5,(Rice)
3,0.5,"(Beer, Rice)"


In [10]:
#Mining rules from frequent itemset
from mlxtend.frequent_patterns import association_rules
rules=association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

In [11]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Rice),(Beer),0.5,0.75,0.5,1.0,1.333333,0.125,inf


In [12]:
#lift{A,B} = lift{B,A} = support{A,B} / (support{A} * support{B})
"""
lift = 1 implies no relationship between A and B. 
   (ie: A and B occur together only by chance)

 * lift > 1 implies that there is a positive relationship between A and B.
   (ie:  A and B occur together more often than random)

 * lift < 1 implies that there is a negative relationship between A and B.
   (ie:  A and B occur together less often than random)
* leverage(A->C) = support(A->C) - support(A)*support(C),
range: [-1, 1]

* conviction = [1 - support(C)] / [1 - confidence(A->C)],
range: [0, inf]
"""


'\nlift = 1 implies no relationship between A and B. \n   (ie: A and B occur together only by chance)\n\n * lift > 1 implies that there is a positive relationship between A and B.\n   (ie:  A and B occur together more often than random)\n\n * lift < 1 implies that there is a negative relationship between A and B.\n   (ie:  A and B occur together less often than random)\n* leverage(A->C) = support(A->C) - support(A)*support(C),\nrange: [-1, 1]\n\n* conviction = [1 - support(C)] / [1 - confidence(A->C)],\nrange: [0, inf]\n'

In [13]:
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Rice),(Beer),0.5,0.75,0.5,1.0,1.333333,0.125,inf


In [14]:
#Filtering the rules
rules[ (rules['lift'] >= 1) &
       (rules['confidence'] >= 0.4) ].sort_values(['confidence'], ascending =False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Rice),(Beer),0.5,0.75,0.5,1.0,1.333333,0.125,inf


In [15]:
rules[ (rules['support'] >= 0.8) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction


In [20]:
rules[rules['antecedents']==frozenset({'Beer'})]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
