# Association Rule Mining with Apriori Algorithm

## Install the necessary Library

In [1]:
pip install mlxtend==0.17.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mlxtend==0.17.0
  Downloading mlxtend-0.17.0-py2.py3-none-any.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mlxtend
  Attempting uninstall: mlxtend
    Found existing installation: mlxtend 0.14.0
    Uninstalling mlxtend-0.14.0:
      Successfully uninstalled mlxtend-0.14.0
Successfully installed mlxtend-0.17.0


## Create a small dataset

In [2]:
import pandas as pd

data = pd.DataFrame({
    'Milk': [1, 1, 0, 1, 1],
    'Bread': [1, 0, 1, 1, 0],
    'Cheese': [1, 1, 1, 0, 0],
    'Apples': [0, 1, 0, 1, 0],
    'Bananas': [0, 1, 1, 0, 1]
})

data

Unnamed: 0,Milk,Bread,Cheese,Apples,Bananas
0,1,1,1,0,0
1,1,0,1,1,1
2,0,1,1,0,1
3,1,1,0,1,0
4,1,0,0,0,1


## Generate the Frequent Item Sets

Let's use the Apriori function from mletend library to generate the frequent itemssets and the association rules.


In [3]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules


In [4]:
# Set the minimum support threshold to 0.4
min_support_threshold = 0.4

frequent_itemsets = apriori(data, min_support=min_support_threshold, use_colnames=True)


In [5]:
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.8,(Milk)
1,0.6,(Bread)
2,0.6,(Cheese)
3,0.4,(Apples)
4,0.6,(Bananas)
5,0.4,"(Milk, Bread)"
6,0.4,"(Cheese, Milk)"
7,0.4,"(Milk, Apples)"
8,0.4,"(Bananas, Milk)"
9,0.4,"(Cheese, Bread)"


In [7]:
# Generate the association rules

rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)


In [8]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Milk),(Apples),0.8,0.4,0.4,0.5,1.25,0.08,1.2
1,(Apples),(Milk),0.4,0.8,0.4,1.0,1.25,0.08,inf
2,(Cheese),(Bread),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2
3,(Bread),(Cheese),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2
4,(Cheese),(Bananas),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2
5,(Bananas),(Cheese),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2


# Try on another Dataset

The apriori functions expexts dat in a one-hot-encoded pandas DataFrame.

However, thensaction data is given in below format:

In [9]:
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

In [10]:
dataset

[['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
 ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
 ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
 ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
 ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

We can transform it into teh right format via the **TransactionEncoder** as follows:

In [11]:

from mlxtend.preprocessing import TransactionEncoder

In [12]:
te = TransactionEncoder()

te_ary = te.fit(dataset).transform(dataset)

df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


In [13]:
apriori(df, min_support=0.6)

Unnamed: 0,support,itemsets
0,0.8,(3)
1,1.0,(5)
2,0.6,(6)
3,0.6,(8)
4,0.6,(10)
5,0.8,"(3, 5)"
6,0.6,"(8, 3)"
7,0.6,"(5, 6)"
8,0.6,"(8, 5)"
9,0.6,"(10, 5)"


In [14]:
apriori(df, min_support=0.6, use_colnames=True)

Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(Kidney Beans)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(Kidney Beans, Eggs)"
6,0.6,"(Onion, Eggs)"
7,0.6,"(Milk, Kidney Beans)"
8,0.6,"(Onion, Kidney Beans)"
9,0.6,"(Yogurt, Kidney Beans)"


## Selecting and Filtering Results

We are interested in the itemsets of length 2 with a support of minimum 80%.


In [16]:
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))

In [17]:
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.8,(Eggs),1
1,1.0,(Kidney Beans),1
2,0.6,(Milk),1
3,0.6,(Onion),1
4,0.6,(Yogurt),1
5,0.8,"(Kidney Beans, Eggs)",2
6,0.6,"(Onion, Eggs)",2
7,0.6,"(Milk, Kidney Beans)",2
8,0.6,"(Onion, Kidney Beans)",2
9,0.6,"(Yogurt, Kidney Beans)",2


In [19]:
frequent_itemsets[ (frequent_itemsets['length'] == 2) &
                   (frequent_itemsets['support'] >= 0.8) ]

Unnamed: 0,support,itemsets,length
5,0.8,"(Kidney Beans, Eggs)",2


## FP-Growth

In [20]:
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

In [21]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

In [22]:
from mlxtend.frequent_patterns import fpgrowth

In [25]:
frequent_itemsets = fpgrowth(df, min_support=0.6, use_colnames=True)

## Generate the Association Rules

In [26]:
from mlxtend.frequent_patterns import association_rules

rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Kidney Beans),(Eggs),1.0,0.8,0.8,0.8,1.0,0.0,1.0
1,(Eggs),(Kidney Beans),0.8,1.0,0.8,1.0,1.0,0.0,inf
2,(Yogurt),(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
3,(Kidney Beans),(Yogurt),1.0,0.6,0.6,0.6,1.0,0.0,1.0
4,(Onion),(Eggs),0.6,0.8,0.6,1.0,1.25,0.12,inf
5,(Eggs),(Onion),0.8,0.6,0.6,0.75,1.25,0.12,1.6
6,(Onion),(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
7,(Kidney Beans),(Onion),1.0,0.6,0.6,0.6,1.0,0.0,1.0
8,"(Onion, Kidney Beans)",(Eggs),0.6,0.8,0.6,1.0,1.25,0.12,inf
9,"(Onion, Eggs)",(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf


## Comparison between Apriori and FP-Growth

In [27]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

In [28]:
from mlxtend.frequent_patterns import apriori

%timeit -n 100 -r 10 apriori(df, min_support=0.6)

2.83 ms ± 191 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [29]:
from mlxtend.frequent_patterns import fpgrowth

%timeit -n 100 -r 10 fpgrowth(df, min_support=0.6)

639 µs ± 40.5 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
