In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
np.random.seed(210508)

In [3]:
transactions = pd.read_csv('subset_bakery.csv')
transactions.head()

Unnamed: 0,date,time,transaction,item
0,2016-10-30,09:58:11,1,Bread
1,2016-10-30,10:05:34,2,Scandinavian
2,2016-10-30,10:05:34,2,Scandinavian
3,2016-10-30,10:07:57,3,Hot chocolate
4,2016-10-30,10:07:57,3,Cookies


In [4]:
transactions.shape

(18733, 4)

In [5]:
transactions.groupby(['transaction'],as_index=False)['item']\
    .count()\
    .rename(columns={'item':'count'})\
    .shape

(9065, 2)

In [6]:
transactions.groupby(['transaction'],as_index=False)['item']\
    .count()\
    .rename(columns={'item':'count'})\
    .sort_values(by = "count",ascending = False)\
    .head()

Unnamed: 0,transaction,count
5905,6279,11
6808,7245,9
5686,6045,9
3569,3799,9
6090,6474,9


In [7]:
transactions.item.nunique()

20

In [8]:
txs = transactions.groupby(['transaction'])['item'].apply(lambda x: np.unique(x))

In [9]:
txs.head()

transaction
1                     [Bread]
2              [Scandinavian]
3    [Cookies, Hot chocolate]
4                    [Muffin]
5     [Bread, Coffee, Pastry]
Name: item, dtype: object

In [10]:
txs_list = txs.values.tolist()

In [11]:
txs_list[:5]

[array(['Bread'], dtype=object),
 array(['Scandinavian'], dtype=object),
 array(['Cookies', 'Hot chocolate'], dtype=object),
 array(['Muffin'], dtype=object),
 array(['Bread', 'Coffee', 'Pastry'], dtype=object)]

In [12]:
from mlxtend.preprocessing import TransactionEncoder

In [13]:
te = TransactionEncoder()
txs_formatted = te.fit(txs_list).transform(txs_list)

In [14]:
txs_formatted.shape

(9065, 20)

In [15]:
txs_formatted[:1]

array([[False,  True, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False]])

In [16]:
df = pd.DataFrame(txs_formatted, columns = te.columns_)
df.head()

Unnamed: 0,Alfajores,Bread,Brownie,Cake,Coffee,Cookies,Farm House,Hot chocolate,Juice,Medialuna,Muffin,NONE,Pastry,Sandwich,Scandinavian,Scone,Soup,Tea,Toast,Truffles
0,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False
2,False,False,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False
4,False,True,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False


In [17]:
from mlxtend.frequent_patterns import apriori

In [32]:
apriori_df = apriori(df, min_support=0.01, use_colnames=True)
apriori_df.sort_values(by = 'support',ascending=False)

Unnamed: 0,support,itemsets
4,0.499504,(Coffee)
1,0.341644,(Bread)
17,0.148924,(Tea)
3,0.108439,(Cake)
24,0.093988,"(Coffee, Bread)"
12,0.089906,(Pastry)
11,0.083067,(NONE)
13,0.075014,(Sandwich)
9,0.064534,(Medialuna)
7,0.060894,(Hot chocolate)


In [19]:
from mlxtend.frequent_patterns import association_rules

In [33]:
rules_df = association_rules(apriori_df,metric="confidence",min_threshold=0.3)
rules_df.sort_values(by="lift",ascending = False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
15,(Toast),(Coffee),0.03508,0.499504,0.02471,0.704403,1.410205,0.007188,1.693169
7,(Medialuna),(Coffee),0.064534,0.499504,0.036735,0.569231,1.139593,0.0045,1.161867
10,(Pastry),(Coffee),0.089906,0.499504,0.049641,0.552147,1.105392,0.004733,1.117547
0,(Alfajores),(Coffee),0.037948,0.499504,0.020518,0.540698,1.08247,0.001563,1.089688
6,(Juice),(Coffee),0.040265,0.499504,0.021511,0.534247,1.069555,0.001399,1.074595
9,(NONE),(Coffee),0.083067,0.499504,0.044236,0.532537,1.066132,0.002744,1.070664
11,(Sandwich),(Coffee),0.075014,0.499504,0.039934,0.532353,1.065764,0.002464,1.070244
3,(Cake),(Coffee),0.108439,0.499504,0.057143,0.526958,1.054964,0.002977,1.058039
12,(Scone),(Coffee),0.036073,0.499504,0.018864,0.522936,1.046911,0.000845,1.049117
4,(Cookies),(Coffee),0.056812,0.499504,0.029454,0.518447,1.037924,0.001076,1.039337
