![title](img/header.png)

In [11]:
#!pip install mlxtend --user

In [113]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

df = pd.read_excel('Online Retail1.xlsx')
df

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,City
0,1,85123A,roti,6,2010-12-01 08:26:00.000,2.55,11,Malang
1,1,71053,selai,6,2010-12-01 08:26:00.000,3.39,11,Malang
2,1,84406B,mentega,8,2010-12-01 08:26:00.000,2.75,11,Malang
3,2,84029G,roti,6,2010-12-01 08:30:00.000,3.39,22,Malang
4,2,84029E,mentega,6,2010-12-02 08:29:59.995,3.39,22,Malang
5,3,22752,roti,2,2010-12-01 08:35:00.000,7.65,33,Malang
6,3,21730,susu,6,2010-12-02 08:35:00.000,4.25,33,Malang
7,3,22633,mentega,6,2010-12-03 08:35:00.000,1.85,33,Malang
8,4,22632,coklat,6,2010-12-01 08:40:00.000,1.85,44,Malang
9,4,84879,roti,32,2010-12-02 08:39:59.995,1.69,44,Malang


In [114]:
# Clean up spaces in description and remove any rows that don't have a valid invoice
df['Description'] = df['Description'].str.strip()
df.dropna(axis=0, subset=['InvoiceNo'], inplace=True)

In [115]:
#df['InvoiceNo'] = df['InvoiceNo'].astype('str')
#df = df[~df['InvoiceNo'].str.contains('C')]

In [116]:
basket = (df[df['City'] =="Malang"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

In [117]:
#basket.head()
basket

Description,coklat,mentega,roti,selai,susu
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0.0,8.0,6.0,6.0,0.0
2,0.0,6.0,6.0,0.0,0.0
3,0.0,6.0,2.0,0.0,6.0
4,6.0,0.0,32.0,0.0,0.0
5,8.0,0.0,0.0,0.0,9.0


In [118]:
# Show a subset of columns
basket.iloc[:,[0,1,2,3]].head()

Description,coklat,mentega,roti,selai
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,0.0,8.0,6.0,6.0
2,0.0,6.0,6.0,0.0
3,0.0,6.0,2.0,0.0
4,6.0,0.0,32.0,0.0
5,8.0,0.0,0.0,0.0


In [119]:
# Convert the units to 1 hot encoded values
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

In [120]:
basket_sets = basket.applymap(encode_units)

In [121]:
# No need to track mentega
#basket_sets.drop('mentega', inplace=True, axis=1)

In [122]:
basket_sets.head()

Description,coklat,mentega,roti,selai,susu
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0,1,1,1,0
2,0,1,1,0,0
3,0,1,1,0,1
4,1,0,1,0,0
5,1,0,0,0,1


In [123]:
# Build up the frequent items# Build  
frequent_itemsets = apriori(basket_sets, min_support=0.07, use_colnames=True)

In [124]:
frequent_itemsets.head()

Unnamed: 0,support,itemsets
0,0.4,(coklat)
1,0.6,(mentega)
2,0.8,(roti)
3,0.2,(selai)
4,0.4,(susu)


In [125]:
# Create the rules# Create 
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules.iloc[:,[0,1,2,3,4,5,6]].head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift
0,(susu),(coklat),0.4,0.4,0.2,0.5,1.25
1,(coklat),(susu),0.4,0.4,0.2,0.5,1.25
2,(mentega),(roti),0.6,0.8,0.6,1.0,1.25
3,(roti),(mentega),0.8,0.6,0.6,0.75,1.25
4,(mentega),(selai),0.6,0.2,0.2,0.333333,1.666667


In [126]:
rules[ (rules['lift'] >= 1) &
       (rules['confidence'] >= 0.8) ];
rules.iloc[:,[0,1,2,3,4,5,6]].head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift
0,(susu),(coklat),0.4,0.4,0.2,0.5,1.25
1,(coklat),(susu),0.4,0.4,0.2,0.5,1.25
2,(mentega),(roti),0.6,0.8,0.6,1.0,1.25
3,(roti),(mentega),0.8,0.6,0.6,0.75,1.25
4,(mentega),(selai),0.6,0.2,0.2,0.333333,1.666667


In [127]:
basket['susu'].sum()

15.0

In [128]:
basket['coklat'].sum()

14.0

![title](img/thumbs-up.png)