# Market Basket Analysis 1

### Install package

In [1]:
# ! pip install mlxtend

### Import libraries

In [2]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

### Initialize data

In [3]:
# Data represents various shopping carts
dataset = [['Apple', 'Beer', 'Rice', 'Chicken'],
           ['Apple', 'Beer', 'Rice'],
           ['Apple', 'Beer'],
           ['Apple', 'Bananas'],
           ['Milk', 'Beer', 'Rice', 'Chicken'],
           ['Milk', 'Beer', 'Rice'],
           ['Milk', 'Beer'],
           ['Apple', 'Bananas']]
dataset

[['Apple', 'Beer', 'Rice', 'Chicken'],
 ['Apple', 'Beer', 'Rice'],
 ['Apple', 'Beer'],
 ['Apple', 'Bananas'],
 ['Milk', 'Beer', 'Rice', 'Chicken'],
 ['Milk', 'Beer', 'Rice'],
 ['Milk', 'Beer'],
 ['Apple', 'Bananas']]

### Convert dataset to dataframe with boolean values

In [4]:
te = TransactionEncoder()
te_array = te.fit(dataset).transform(dataset)
te_array

array([[ True, False,  True,  True, False,  True],
       [ True, False,  True, False, False,  True],
       [ True, False,  True, False, False, False],
       [ True,  True, False, False, False, False],
       [False, False,  True,  True,  True,  True],
       [False, False,  True, False,  True,  True],
       [False, False,  True, False,  True, False],
       [ True,  True, False, False, False, False]])

In [5]:
df = pd.DataFrame(te_array, columns=te.columns_)
df

Unnamed: 0,Apple,Bananas,Beer,Chicken,Milk,Rice
0,True,False,True,True,False,True
1,True,False,True,False,False,True
2,True,False,True,False,False,False
3,True,True,False,False,False,False
4,False,False,True,True,True,True
5,False,False,True,False,True,True
6,False,False,True,False,True,False
7,True,True,False,False,False,False


### Find frequently occurring itemsets using Apriori algorithm

In [6]:
frequent_itemsets_ap = apriori(df, min_support=0.5, use_colnames=True)
frequent_itemsets_ap

Unnamed: 0,support,itemsets
0,0.625,(Apple)
1,0.75,(Beer)
2,0.5,(Rice)
3,0.5,"(Rice, Beer)"


In [7]:
frequent_itemsets_ap = apriori(df, min_support=0.3, use_colnames=True)
frequent_itemsets_ap

Unnamed: 0,support,itemsets
0,0.625,(Apple)
1,0.75,(Beer)
2,0.375,(Milk)
3,0.5,(Rice)
4,0.375,"(Apple, Beer)"
5,0.375,"(Milk, Beer)"
6,0.5,"(Rice, Beer)"


### Generate association rules

In [8]:
rules_ap = association_rules(frequent_itemsets_ap, metric='confidence', min_threshold=0.5)
rules_ap

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Apple),(Beer),0.625,0.75,0.375,0.6,0.8,-0.09375,0.625,-0.4
1,(Beer),(Apple),0.75,0.625,0.375,0.5,0.8,-0.09375,0.75,-0.5
2,(Milk),(Beer),0.375,0.75,0.375,1.0,1.333333,0.09375,inf,0.4
3,(Beer),(Milk),0.75,0.375,0.375,0.5,1.333333,0.09375,1.25,1.0
4,(Rice),(Beer),0.5,0.75,0.5,1.0,1.333333,0.125,inf,0.5
5,(Beer),(Rice),0.75,0.5,0.5,0.666667,1.333333,0.125,1.5,1.0
