In [1]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

In [3]:
df = pd.read_csv('Bread_Cheese.csv')
df.shape

(5, 4)

In [4]:
df

Unnamed: 0,Bread,Cheese,Eggs,Juice
0,Bread,Cheese,Juice,
1,Bread,Milk,Yogurt,
2,Bread,Juice,Milk,
3,Cheese,Juice,Milk,
4,Milk,Yogurt,,


In [5]:
trans = [line.rstrip().split(',') for line in open('Bread_Cheese.csv')]
trans

[['Bread', 'Cheese', 'Eggs', 'Juice'],
 ['Bread', 'Cheese', 'Juice'],
 ['Bread', 'Milk', 'Yogurt'],
 ['Bread', 'Juice', 'Milk'],
 ['Cheese', 'Juice', 'Milk'],
 ['Milk', 'Yogurt']]

In [10]:
# just to see
pd.read_csv('Bread_Cheese.csv', header = None, encoding = 'UTF-8' )

Unnamed: 0,0,1,2,3
0,Bread,Cheese,Eggs,Juice
1,Bread,Cheese,Juice,
2,Bread,Milk,Yogurt,
3,Bread,Juice,Milk,
4,Cheese,Juice,Milk,
5,Milk,Yogurt,,


In [19]:
te = TransactionEncoder()
te_ary = te.fit(trans).transform(trans)
te_ary

array([[ True,  True,  True,  True, False, False],
       [ True,  True, False,  True, False, False],
       [ True, False, False, False,  True,  True],
       [ True, False, False,  True,  True, False],
       [False,  True, False,  True,  True, False],
       [False, False, False, False,  True,  True]])

In [26]:
int_te_ary = te_ary.astype('int')

In [28]:
df_trans = pd.DataFrame(int_te_ary)

### Columns Mapping after TransactionEncoder

In [31]:
te.columns_

['Bread', 'Cheese', 'Eggs', 'Juice', 'Milk', 'Yogurt']

In [32]:
te.columns_mapping_

{'Bread': 0, 'Cheese': 1, 'Eggs': 2, 'Juice': 3, 'Milk': 4, 'Yogurt': 5}

In [34]:
df_trans.columns = te.columns_

In [36]:
df_trans

Unnamed: 0,Bread,Cheese,Eggs,Juice,Milk,Yogurt
0,1,1,1,1,0,0
1,1,1,0,1,0,0
2,1,0,0,0,1,1
3,1,0,0,1,1,0
4,0,1,0,1,1,0
5,0,0,0,0,1,1


### Step1: Generate Frequent Item Sets

In [41]:
# Generate frequent itemsets
x = time.time()
frequent_itemsets = apriori(df_trans, min_support = 0.16, use_colnames = True)

# use min support to start with
y = time.time()

print("Time in Seconds %s"% (y-x))

Time in Seconds 0.021896839141845703


In [42]:
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.666667,(Bread)
1,0.5,(Cheese)
2,0.166667,(Eggs)
3,0.666667,(Juice)
4,0.666667,(Milk)
5,0.333333,(Yogurt)
6,0.333333,"(Cheese, Bread)"
7,0.166667,"(Eggs, Bread)"
8,0.5,"(Juice, Bread)"
9,0.333333,"(Milk, Bread)"


This gives us the non-empty subsets which are satisfying the minimum support criteria.

### Step2: Generate Association Rules

In [44]:
rules = association_rules(frequent_itemsets)
rules.shape

(24, 9)

In [45]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Eggs),(Bread),0.166667,0.666667,0.166667,1.0,1.5,0.055556,inf
1,(Eggs),(Cheese),0.166667,0.5,0.166667,1.0,2.0,0.083333,inf
2,(Cheese),(Juice),0.5,0.666667,0.5,1.0,1.5,0.166667,inf
3,(Eggs),(Juice),0.166667,0.666667,0.166667,1.0,1.5,0.055556,inf
4,(Yogurt),(Milk),0.333333,0.666667,0.333333,1.0,1.5,0.111111,inf
5,"(Cheese, Eggs)",(Bread),0.166667,0.666667,0.166667,1.0,1.5,0.055556,inf
6,"(Eggs, Bread)",(Cheese),0.166667,0.5,0.166667,1.0,2.0,0.083333,inf
7,(Eggs),"(Cheese, Bread)",0.166667,0.333333,0.166667,1.0,3.0,0.111111,inf
8,"(Cheese, Bread)",(Juice),0.333333,0.666667,0.333333,1.0,1.5,0.111111,inf
9,"(Juice, Eggs)",(Bread),0.166667,0.666667,0.166667,1.0,1.5,0.055556,inf


In [47]:
rules[(rules['lift'] >= 2)].iloc[:, 0:7]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift
1,(Eggs),(Cheese),0.166667,0.5,0.166667,1.0,2.0
6,"(Eggs, Bread)",(Cheese),0.166667,0.5,0.166667,1.0,2.0
7,(Eggs),"(Cheese, Bread)",0.166667,0.333333,0.166667,1.0,3.0
11,(Eggs),"(Juice, Bread)",0.166667,0.5,0.166667,1.0,2.0
14,"(Juice, Eggs)",(Cheese),0.166667,0.5,0.166667,1.0,2.0
15,(Eggs),"(Cheese, Juice)",0.166667,0.5,0.166667,1.0,2.0
19,"(Juice, Eggs, Bread)",(Cheese),0.166667,0.5,0.166667,1.0,2.0
20,"(Cheese, Eggs)","(Juice, Bread)",0.166667,0.5,0.166667,1.0,2.0
21,"(Juice, Eggs)","(Cheese, Bread)",0.166667,0.333333,0.166667,1.0,3.0
22,"(Eggs, Bread)","(Cheese, Juice)",0.166667,0.5,0.166667,1.0,2.0
