## Import Library

In [1]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

## Import Dataset

In [2]:
df = pd.read_csv('Groceries data.csv')

In [3]:
df.head()

Unnamed: 0,Member_number,Date,itemDescription,year,month,day,day_of_week
0,1808,2015-07-21,tropical fruit,2015,7,21,1
1,2552,2015-05-01,whole milk,2015,5,1,4
2,2300,2015-09-19,pip fruit,2015,9,19,5
3,1187,2015-12-12,other vegetables,2015,12,12,5
4,3037,2015-01-02,whole milk,2015,1,2,4


In [4]:
df['date'] = pd.to_datetime(df[['year', 'month', 'day']])
df.head()

Unnamed: 0,Member_number,Date,itemDescription,year,month,day,day_of_week,date
0,1808,2015-07-21,tropical fruit,2015,7,21,1,2015-07-21
1,2552,2015-05-01,whole milk,2015,5,1,4,2015-05-01
2,2300,2015-09-19,pip fruit,2015,9,19,5,2015-09-19
3,1187,2015-12-12,other vegetables,2015,12,12,5,2015-12-12
4,3037,2015-01-02,whole milk,2015,1,2,4,2015-01-02


In [5]:
df['member_date'] = list(zip(df['Member_number'], df['date'].dt.date))
df['quantity'] = 1

In [6]:
df.shape

(38765, 10)

## Market Basket Analysis

In [7]:
df.groupby('itemDescription').size().sort_values(ascending=False)

itemDescription
whole milk               2502
other vegetables         1898
rolls/buns               1716
soda                     1514
yogurt                   1334
                         ... 
rubbing alcohol             5
bags                        4
baby cosmetics              3
preservation products       1
kitchen utensil             1
Length: 167, dtype: int64

In [8]:
basket = df.groupby(['member_date', 'itemDescription'])['quantity'].count().unstack().fillna(0)

In [9]:
def convert_values(value):
    if value >= 1:
        return 1
    else:
        return 0 

In [10]:
basket = basket.applymap(convert_values)

In [11]:
basket_items = apriori(basket, min_support = 0.005, use_colnames = True, max_len = 2)

In [12]:
rules = association_rules(basket_items, metric = 'lift', min_threshold=1)

In [13]:
rules.sort_values("confidence", ascending=False).head(15)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(frankfurter),(other vegetables),0.03776,0.122101,0.005146,0.136283,1.11615,0.000536,1.01642
3,(sausage),(soda),0.060349,0.097106,0.005948,0.09856,1.014975,8.8e-05,1.001613
5,(sausage),(yogurt),0.060349,0.085879,0.005748,0.095238,1.108986,0.000565,1.010345
4,(yogurt),(sausage),0.085879,0.060349,0.005748,0.066926,1.108986,0.000565,1.007049
2,(soda),(sausage),0.097106,0.060349,0.005948,0.061253,1.014975,8.8e-05,1.000963
1,(other vegetables),(frankfurter),0.122101,0.03776,0.005146,0.042146,1.11615,0.000536,1.004579
