# Association rule mining

In [2]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

### Mlxtend (machine learning extensions) 

Python library of useful tools for the day-to-day data science tasks.

http://rasbt.github.io/mlxtend/api_subpackages/mlxtend.frequent_patterns/

### Load data

In [3]:
data = pd.read_csv("./supermarket_short.csv")
data.head (10)

Unnamed: 0,grocery misc,baby needs,bread and cake,baking needs,coupons,juice-sat-cord-ms,tea,biscuits,canned fish-meat,canned fruit,...,750ml white nz,750ml red nz,750ml white imp,750ml red imp,sparkling nz,sparkling imp,brew kits/accesry,port and sherry,ctrled label wine,non host support
0,0,1,1,1,0,1,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,1,0,1,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
3,0,0,1,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,1,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,1,1,0,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,1,1,0,1,1,1,0,1,...,0,0,0,0,0,0,0,0,0,0
7,0,1,1,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
9,0,0,1,1,0,1,0,1,0,1,...,0,0,0,0,1,0,0,0,0,0


### Find frequent itemsets and rules

In [86]:
frequent_itemsets = apriori(data, min_support=0.2, use_colnames=True)

In [87]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

### Add antecedent lenght column

In [88]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))

### Filter rules

In [99]:
rules[(rules['support'] > 0.2)].sort_values(by=['support'],ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
25,(milk-cream),(bread and cake),0.635185,0.719689,0.505079,0.795168,1.104878,0.047944,1.368496,1
26,(bread and cake),(milk-cream),0.719689,0.635185,0.505079,0.701802,1.104878,0.047944,1.223398,1
32,(bread and cake),(fruit),0.719689,0.640156,0.502485,0.698198,1.090670,0.041773,1.192320,1
33,(fruit),(bread and cake),0.640156,0.719689,0.502485,0.784943,1.090670,0.041773,1.303425,1
34,(vegetables),(bread and cake),0.639939,0.719689,0.496650,0.776089,1.078368,0.036093,1.251888,1
...,...,...,...,...,...,...,...,...,...,...
892,"(biscuits, margarine)",(party snack foods),0.322671,0.503566,0.200130,0.620228,1.231671,0.037643,1.307189,2
893,"(biscuits, party snack foods)",(margarine),0.344067,0.494489,0.200130,0.581658,1.176282,0.029992,1.208369,2
894,"(margarine, party snack foods)",(biscuits),0.266912,0.563000,0.200130,0.749798,1.331790,0.049858,1.746587,2
967,"(sauces-gravy-pkle, vegetables)",(margarine),0.336719,0.494489,0.200130,0.594352,1.201952,0.033626,1.246181,2
