# Association Rule Mining
## Dataset:  Groceries Dataset
## Source: https://www.kaggle.com/heeraldedhia/groceries-dataset

## Reading dataset

In [57]:
import pandas as pd
data = pd.read_csv('Groceries_dataset.csv')
data.describe()

Unnamed: 0,Member_number
count,38765.0
mean,3003.641868
std,1153.611031
min,1000.0
25%,2002.0
50%,3005.0
75%,4007.0
max,5000.0


## Preprocessing dataset

## Fetching number of unique members

In [58]:
unique_members = data['Member_number'].unique()
len(unique_members)

3898

## Grouping data based on members

In [59]:
grouped_data = data.groupby(data['Member_number'])
grouped_data.head(5)

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk
...,...,...,...
38621,2192,21-11-2014,seasonal products
38645,4973,28-05-2014,newspapers
38684,2936,07-03-2014,newspapers
38704,3654,17-06-2014,newspapers


## Preparing transactions for every member

In [60]:
transactions_duplicates = [list(grouped_data.get_group(unique_member)['itemDescription']) for unique_member in unique_members]
transactions_duplicates[1]

['whole milk',
 'butter',
 'female sanitary products',
 'pot plants',
 'other vegetables',
 'tropical fruit',
 'root vegetables',
 'whole milk',
 'shopping bags',
 'chocolate',
 'chocolate',
 'coffee',
 'hygiene articles']

## Removing duplicaties from each transactions

In [63]:
transactions = []
for transaction in transactions_duplicates:
    temp_list = []
    [temp_list.append(item) for item in transaction if item not in temp_list]
    transactions.append(temp_list)
transactions[1]

['whole milk',
 'butter',
 'female sanitary products',
 'pot plants',
 'other vegetables',
 'tropical fruit',
 'root vegetables',
 'shopping bags',
 'chocolate',
 'coffee',
 'hygiene articles']

# Association Rule Mining

## Using apriori algorithm for getting association rules

In [80]:
from apyori import apriori
association_rules = list(apriori(transactions))
for rule in association_rules:
    if len(list(rule[0])) > 1:
        print(f'Rule: {list(rule[0])[0]} -> {list(rule[0])[1]}')
        print(f'Support: {str(rule[1])}')
        print(f'Confidence: {str(rule[2][0][2])}')
        print(f'Lift: {str(rule[2][0][3])}')
        print('--------------------------------------------------')

Rule: bottled water -> whole milk
Support: 0.11236531554643407
Confidence: 0.11236531554643407
Lift: 1.0
--------------------------------------------------
Rule: other vegetables -> rolls/buns
Support: 0.14674191893278604
Confidence: 0.14674191893278604
Lift: 1.0
--------------------------------------------------
Rule: other vegetables -> soda
Support: 0.1241662390969728
Confidence: 0.1241662390969728
Lift: 1.0
--------------------------------------------------
Rule: other vegetables -> whole milk
Support: 0.1913801949717804
Confidence: 0.1913801949717804
Lift: 1.0
--------------------------------------------------
Rule: other vegetables -> yogurt
Support: 0.12031811185223192
Confidence: 0.12031811185223192
Lift: 1.0
--------------------------------------------------
Rule: soda -> rolls/buns
Support: 0.11980502821959979
Confidence: 0.11980502821959979
Lift: 1.0
--------------------------------------------------
Rule: whole milk -> rolls/buns
Support: 0.17855310415597742
Confidence: 0.1