# Association rule mining

In [1]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

### Mlxtend (machine learning extensions) 

Python library of useful tools for the day-to-day data science tasks.

http://rasbt.github.io/mlxtend/api_subpackages/mlxtend.frequent_patterns/

### Load data

In [3]:
data = pd.read_csv("supermarket_short.csv")
data.head (10)

Unnamed: 0,grocery misc,baby needs,bread and cake,baking needs,coupons,juice-sat-cord-ms,tea,biscuits,canned fish-meat,canned fruit,...,750ml white nz,750ml red nz,750ml white imp,750ml red imp,sparkling nz,sparkling imp,brew kits/accesry,port and sherry,ctrled label wine,non host support
0,0,1,1,1,0,1,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,1,0,1,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
3,0,0,1,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,1,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,1,1,0,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,1,1,0,1,1,1,0,1,...,0,0,0,0,0,0,0,0,0,0
7,0,1,1,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
9,0,0,1,1,0,1,0,1,0,1,...,0,0,0,0,1,0,0,0,0,0


### Find frequent itemsets and rules

In [4]:
frequent_itemsets = apriori(data, min_support=0.1, use_colnames=True)

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.4)

### Add antecedent lenght column

In [5]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))

### Filter rules

In [59]:
rules[ (rules['antecedent_len'] >= 1) &
       (rules['lift'] > 1.1) &
       (rules['confidence'] < 0.7) &
       (rules['support'] > 0.35) ].sort_values(by=['confidence'],ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
179,(biscuits),(frozen foods),0.563,0.587206,0.391182,0.694818,1.183261,0.060586,1.352616,1
59,(biscuits),(baking needs),0.563,0.604063,0.381241,0.677159,1.121008,0.041153,1.226416,1
73,(frozen foods),(baking needs),0.587206,0.604063,0.396585,0.675377,1.118057,0.041876,1.219683,1
180,(frozen foods),(biscuits),0.587206,0.563,0.391182,0.666176,1.183261,0.060586,1.309073,1
74,(baking needs),(frozen foods),0.604063,0.587206,0.396585,0.65653,1.118057,0.041876,1.201834,1
60,(baking needs),(biscuits),0.604063,0.563,0.381241,0.631127,1.121008,0.041153,1.18469,1
7,(bread and cake),(biscuits),0.719689,0.563,0.450184,0.625526,1.111058,0.044999,1.166969,1
207,(fruit),(biscuits),0.640156,0.563,0.397018,0.620189,1.10158,0.03661,1.150573,1
1966,(vegetables),"(bread and cake, fruit)",0.639939,0.502485,0.387076,0.604863,1.203743,0.065516,1.259095,1
1967,(fruit),"(vegetables, bread and cake)",0.640156,0.49665,0.387076,0.604659,1.217475,0.069143,1.273204,1


In [None]:
#Пока это самое лучшее, что удалось найти. Здесь из наиболее адекватного:
# 94 -- если человек что-то купил в товарах для выпечки, то возможно ему пригодится маргарин 
# 7 -- если человек купил хлед и пирожное, можно предложить ему еще и печенье 

In [74]:
rules[ (rules['antecedent_len'] >= 2) &
     
       (rules['confidence'] >0.8) &
       (rules['support'] < 0.4) &
       (rules['antecedent support'] > 0.35) ].sort_values(by=['confidence'],ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
1028,"(vegetables, biscuits)",(bread and cake),0.381241,0.719689,0.321375,0.842971,1.171299,0.047000,1.785087,2
1000,"(biscuits, milk-cream)",(bread and cake),0.381889,0.719689,0.320942,0.840407,1.167737,0.046101,1.756418,2
1022,"(biscuits, fruit)",(bread and cake),0.397018,0.719689,0.333045,0.838868,1.165598,0.047316,1.739634,2
956,"(biscuits, frozen foods)",(bread and cake),0.391182,0.719689,0.326345,0.834254,1.159187,0.044816,1.691211,2
1492,"(frozen foods, fruit)",(bread and cake),0.402204,0.719689,0.334558,0.831811,1.155792,0.045096,1.666643,2
...,...,...,...,...,...,...,...,...,...,...
3703,"(juice-sat-cord-ms, milk-cream)",(vegetables),0.353793,0.639939,0.248757,0.703115,1.098722,0.022351,1.212796,2
3698,"(juice-sat-cord-ms, fruit)",(milk-cream),0.361357,0.635185,0.253512,0.701555,1.104490,0.023983,1.222387,2
6226,"(vegetables, frozen foods)",(milk-cream),0.406743,0.635185,0.285066,0.700850,1.103380,0.026709,1.219507,2
8997,"(vegetables, fruit, bread and cake)",(baking needs),0.387076,0.604063,0.271234,0.700726,1.160021,0.037416,1.322991,3
