### Run Apriori algorithm to find frequent itemsets and association rules

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('groceries.csv')

In [3]:
df

Unnamed: 0,Items
0,"citrus fruit,semi-finished bread,margarine,rea..."
1,"tropical fruit,yogurt,coffee"
2,whole milk
3,"pip fruit,yogurt,cream cheese ,meat spreads"
4,"other vegetables,whole milk,condensed milk,lon..."
...,...
695,"pork,UHT-milk,bottled water,soda,canned beer"
696,"other vegetables,curd,yogurt,curd cheese,marga..."
697,"rolls/buns,soda,fruit/vegetable juice,canned b..."
698,"frankfurter,pip fruit,whole milk,rolls/buns,ro..."


### Split the items into array of items

In [4]:
data = list(df['Items'].str.split(','))

In [5]:
data[:5]

[['citrus fruit', 'semi-finished bread', 'margarine', 'ready soups'],
 ['tropical fruit', 'yogurt', 'coffee'],
 ['whole milk'],
 ['pip fruit', 'yogurt', 'cream cheese ', 'meat spreads'],
 ['other vegetables',
  'whole milk',
  'condensed milk',
  'long life bakery product']]

### Encoding items into one hot encoding (true / false values)

In [6]:
from mlxtend.preprocessing import TransactionEncoder

In [7]:
te = TransactionEncoder()
data = te.fit_transform(data)
data

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False,  True, False],
       [False, False, False, ...,  True, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ...,  True, False, False],
       [False, False, False, ..., False,  True, False]])

In [8]:
df = pd.DataFrame(data, columns=te.columns_)

In [9]:
df

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baking powder,bathroom cleaner,beef,berries,beverages,...,tropical fruit,turkey,vinegar,waffles,whipped/sour cream,white bread,white wine,whole milk,yogurt,zwieback
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,True,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
696,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
697,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
698,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False


### Finding frequent itemsets and association rules

In [10]:
from mlxtend.frequent_patterns import apriori, association_rules

In [11]:
freq_itemsets = apriori(df, min_support=0.01, use_colnames=True)

In [12]:
freq_itemsets

Unnamed: 0,support,itemsets
0,0.020000,(UHT-milk)
1,0.011429,(baking powder)
2,0.060000,(beef)
3,0.040000,(berries)
4,0.031429,(beverages)
...,...,...
309,0.012857,"(root vegetables, tropical fruit, whole milk)"
310,0.010000,"(root vegetables, whole milk, yogurt)"
311,0.011429,"(whole milk, sausage, yogurt)"
312,0.011429,"(soda, whole milk, yogurt)"


In [13]:
rules = association_rules(freq_itemsets, metric='confidence', min_threshold=0.6)

In [14]:
rules.sort_values(['support', 'confidence'], ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
3,"(fruit/vegetable juice, yogurt)",(whole milk),0.017143,0.251429,0.014286,0.833333,3.314394,0.009976,4.491429
2,(ham),(whole milk),0.022857,0.251429,0.014286,0.625,2.485795,0.008539,1.99619
4,"(margarine, rolls/buns)",(whole milk),0.017143,0.251429,0.012857,0.75,2.982955,0.008547,2.994286
7,"(tropical fruit, yogurt)",(whole milk),0.021429,0.251429,0.012857,0.6,2.386364,0.007469,1.871429
1,(frozen dessert),(whole milk),0.014286,0.251429,0.011429,0.8,3.181818,0.007837,3.742857
0,(cereals),(whole milk),0.011429,0.251429,0.01,0.875,3.480114,0.007127,5.988571
6,"(root vegetables, sausage)",(rolls/buns),0.012857,0.218571,0.01,0.777778,3.55846,0.00719,3.516429
5,"(other vegetables, sugar)",(whole milk),0.014286,0.251429,0.01,0.7,2.784091,0.006408,2.495238
