# Imports

In [3]:
import pandas as pd

from mlxtend.preprocessing     import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

## Functions

# Loading Datasets

In [8]:
df = pd.read_csv('../data/raw/dados-padaria.csv')

In [5]:
df.head()

Unnamed: 0,TransactionNo,Items,DateTime,Daypart,DayType
0,1,Bread,2016-10-30 09:58:11,Morning,Weekend
1,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
2,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
3,3,Hot chocolate,2016-10-30 10:07:57,Morning,Weekend
4,3,Jam,2016-10-30 10:07:57,Morning,Weekend


In [9]:
df.shape

(20507, 5)

In [10]:
df.isna().mean()

TransactionNo    0.0
Items            0.0
DateTime         0.0
Daypart          0.0
DayType          0.0
dtype: float64

## Data Preparation

In [11]:
list_transactions = []

list_unique_transaction = df['TransactionNo'].unique().tolist()

for item in list_unique_transaction:

    set_transaction_item = set(df.loc[df['TransactionNo'] == item, 'Items'])
    
    list_transaction_item = list(set_transaction_item)

    list_transactions.append(list_transaction_item)

In [12]:
list_transactions

[['Bread'],
 ['Scandinavian'],
 ['Jam', 'Cookies', 'Hot chocolate'],
 ['Muffin'],
 ['Pastry', 'Bread', 'Coffee'],
 ['Pastry', 'Medialuna', 'Muffin'],
 ['Pastry', 'Tea', 'Medialuna', 'Coffee'],
 ['Pastry', 'Bread'],
 ['Bread', 'Muffin'],
 ['Scandinavian', 'Medialuna'],
 ['Medialuna', 'Bread'],
 ['Pastry', 'Tartine', 'Tea', 'Jam', 'Coffee'],
 ['Basket', 'Bread', 'Coffee'],
 ['Pastry', 'Medialuna', 'Bread'],
 ['Mineral water', 'Scandinavian'],
 ['Medialuna', 'Bread', 'Coffee'],
 ['Hot chocolate'],
 ['Farm House'],
 ['Farm House', 'Bread'],
 ['Medialuna', 'Bread'],
 ['Medialuna', 'Bread', 'Coffee'],
 ['Jam'],
 ['Scandinavian', 'Muffin'],
 ['Bread'],
 ['Scandinavian'],
 ['Fudge'],
 ['Scandinavian'],
 ['Bread', 'Coffee'],
 ['Jam', 'Bread'],
 ['Bread'],
 ['Basket'],
 ['Scandinavian', 'Muffin'],
 ['Coffee'],
 ['Muffin', 'Coffee'],
 ['Scandinavian', 'Muffin'],
 ['Tea', 'Bread'],
 ['Bread', 'Coffee'],
 ['Tea', 'Bread'],
 ['Scandinavian'],
 ['Juice', 'Tartine', 'Muffin', 'Coffee'],
 ['Scandinavia

In [14]:
te = TransactionEncoder()

list_transactions_te = te.fit_transform(list_transactions)

In [17]:
df_transactions = pd.DataFrame(list_transactions_te, columns=te.columns_)
df_transactions

Unnamed: 0,Adjustment,Afternoon with the baker,Alfajores,Argentina Night,Art Tray,Bacon,Baguette,Bakewell,Bare Popcorn,Basket,...,The BART,The Nomad,Tiffin,Toast,Truffles,Tshirt,Valentine's card,Vegan Feast,Vegan mincepie,Victorian Sponge
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9460,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9461,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False
9462,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9463,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


# Machine Learning

## Association Rules

In [18]:
df_frequent_items = apriori(df_transactions, use_colnames=True, min_support=0.02)
df_frequent_items.sort_values(['support'], ascending=False)

Unnamed: 0,support,itemsets
4,0.478394,(Coffee)
1,0.327205,(Bread)
16,0.142631,(Tea)
3,0.103856,(Cake)
20,0.090016,"(Bread, Coffee)"
11,0.086107,(Pastry)
12,0.071844,(Sandwich)
9,0.061807,(Medialuna)
7,0.05832,(Hot chocolate)
23,0.054728,"(Cake, Coffee)"


In [21]:
df_apriori_rules = association_rules(df_frequent_items, metric='confidence', min_threshold=0.5)
df_apriori_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Cake),(Coffee),0.103856,0.478394,0.054728,0.526958,1.101515,0.005044,1.102664,0.10284
1,(Cookies),(Coffee),0.054411,0.478394,0.028209,0.518447,1.083723,0.002179,1.083174,0.0817
2,(Hot chocolate),(Coffee),0.05832,0.478394,0.029583,0.507246,1.060311,0.001683,1.058553,0.060403
3,(Juice),(Coffee),0.038563,0.478394,0.020602,0.534247,1.11675,0.002154,1.119919,0.108738
4,(Medialuna),(Coffee),0.061807,0.478394,0.035182,0.569231,1.189878,0.005614,1.210871,0.170091
5,(Pastry),(Coffee),0.086107,0.478394,0.047544,0.552147,1.154168,0.006351,1.164682,0.146161
6,(Sandwich),(Coffee),0.071844,0.478394,0.038246,0.532353,1.112792,0.003877,1.115384,0.109205
7,(Toast),(Coffee),0.033597,0.478394,0.023666,0.704403,1.472431,0.007593,1.764582,0.332006
