### Run Apriori algorithm to find frequent itemsets and association rules

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('Groceries_dataset.csv')

In [3]:
df

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk
...,...,...,...
38760,4471,08-10-2014,sliced cheese
38761,2022,23-02-2014,candy
38762,1097,16-04-2014,cake bar
38763,1510,03-12-2014,fruit/vegetable juice


### Group items that are purchased by same member on same date (same transaction)

In [4]:
df['single-transaction'] = df['Member_number'].astype(str) + '_' + df['Date'].astype(str)

In [5]:
df.head()

Unnamed: 0,Member_number,Date,itemDescription,single-transaction
0,1808,21-07-2015,tropical fruit,1808_21-07-2015
1,2552,05-01-2015,whole milk,2552_05-01-2015
2,2300,19-09-2015,pip fruit,2300_19-09-2015
3,1187,12-12-2015,other vegetables,1187_12-12-2015
4,3037,01-02-2015,whole milk,3037_01-02-2015


In [6]:
df2 = pd.crosstab(df['single-transaction'], df['itemDescription'])

In [7]:
df2.head()

itemDescription,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
single-transaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000_15-03-2015,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
1000_24-06-2014,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1000_24-07-2015,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1000_25-11-2015,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1000_27-05-2015,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Encoding dataset into binary values

In [8]:
def encode(item_freq):
    res = False
    if (item_freq > 0):
        res = True
    return res

In [9]:
df2 = df2.applymap(encode)

In [10]:
df2.head()

itemDescription,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
single-transaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000_15-03-2015,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,True,False
1000_24-06-2014,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
1000_24-07-2015,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1000_25-11-2015,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1000_27-05-2015,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


### Finding frequent itemsets from transactions

In [11]:
from mlxtend.frequent_patterns import apriori, association_rules

In [12]:
freq_itemsets = apriori(df2, min_support=0.001, use_colnames=True)

In [13]:
freq_itemsets

Unnamed: 0,support,itemsets
0,0.004010,(Instant food products)
1,0.021386,(UHT-milk)
2,0.001470,(abrasive cleaner)
3,0.001938,(artif. sweetener)
4,0.008087,(baking powder)
...,...,...
745,0.001136,"(sausage, rolls/buns, whole milk)"
746,0.001002,"(rolls/buns, soda, whole milk)"
747,0.001337,"(yogurt, rolls/buns, whole milk)"
748,0.001069,"(sausage, soda, whole milk)"


In [14]:
rules = association_rules(freq_itemsets, metric='confidence', min_threshold=0.15)

In [15]:
rules.sort_values(['support', 'confidence'], ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(bottled beer),(whole milk),0.045312,0.157923,0.007151,0.157817,0.99933,-5e-06,0.999874
3,(ham),(whole milk),0.017109,0.157923,0.00274,0.160156,1.014142,3.8e-05,1.002659
4,(semi-finished bread),(whole milk),0.00949,0.157923,0.001671,0.176056,1.114825,0.000172,1.022008
8,"(yogurt, sausage)",(whole milk),0.005748,0.157923,0.00147,0.255814,1.619866,0.000563,1.131541
9,"(sausage, whole milk)",(yogurt),0.008955,0.085879,0.00147,0.164179,1.91176,0.000701,1.093681
1,(detergent),(whole milk),0.008621,0.157923,0.001403,0.162791,1.030824,4.2e-05,1.005814
6,"(yogurt, rolls/buns)",(whole milk),0.007819,0.157923,0.001337,0.17094,1.082428,0.000102,1.015701
5,"(sausage, rolls/buns)",(whole milk),0.005347,0.157923,0.001136,0.2125,1.345594,0.000292,1.069304
7,"(sausage, soda)",(whole milk),0.005948,0.157923,0.001069,0.179775,1.138374,0.00013,1.026642
2,(frozen fish),(whole milk),0.006817,0.157923,0.001069,0.156863,0.993287,-7e-06,0.998743
