## Import Python Libraries

In [2]:
import numpy as np
import pandas as pd

from mlxtend.frequent_patterns import apriori, association_rules

## Collecting Data

Dataset berasal dari Rekap Penjualan PT Arma Inti Raya (Banjarmasin) tahun 2017

In [3]:
data = pd.read_csv("Penjualan2017.csv")
print("Data shape", data.shape)

Data shape (1240, 9)


## Explore Dataset

In [4]:
data.head()

Unnamed: 0,No,Tanggal,Invoice,NamaPerusahaan,NamaBarang,Qty,Sat,HargaSatuan,JumlahHarga
0,1,1/3/2017,16470599.0,PT. BERSAMA SEJAHTERA SAKTI (GAF),OIL SEAL 120MM X 150MM X 14MM,10,PCS,46150,461500
1,2,1/3/2017,16470599.0,PT. BERSAMA SEJAHTERA SAKTI (GAF),OIL SEAL 130MM X 160MM X 13MM,10,PCS,43500,435000
2,3,1/3/2017,16470599.0,PT. BERSAMA SEJAHTERA SAKTI (GAF),OIL SEAL 150MM X 180MM X 14MM,10,PCS,152400,1524000
3,4,1/3/2017,16470600.0,PT. SWADAYA ANDIKA (SLF),CAT HAMPLE TAHAN PANAS 600˚C,10,KG,250000,2500000
4,5,1/3/2017,16470601.0,PT. BERSAMA SEJAHTERA SAKTI (GAF),ATAP TRANSPARAN 0.8MM X 840MM X 6000MM,70,LBR,895000,62650000


In [5]:
# Eksplor kolom data
data.columns

Index(['No', 'Tanggal', 'Invoice', 'NamaPerusahaan', 'NamaBarang', 'Qty',
       'Sat', 'HargaSatuan', 'JumlahHarga'],
      dtype='object')

In [7]:
# Informasi data
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1231 entries, 0 to 1239
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Invoice         1231 non-null   object
 1   NamaPerusahaan  1231 non-null   object
 2   NamaBarang      1231 non-null   object
 3   Qty             1231 non-null   int64 
dtypes: int64(1), object(3)
memory usage: 48.1+ KB


In [6]:
# Cek baris data yang tidak memiliki nomor nota
data.isnull().any()

# Dari informasi yang saya dapatkan, Invoice/Nota yang kosong dikarenakan pembelian secara tunai

No                False
Tanggal           False
Invoice            True
NamaPerusahaan    False
NamaBarang        False
Qty               False
Sat               False
HargaSatuan       False
JumlahHarga       False
dtype: bool

## Preprocessing Data

In [9]:
# Membersihkan extra white space
data['NamaPerusahaan'] = data['NamaPerusahaan'].str.strip()
data['NamaBarang'] = data['NamaBarang'].str.strip()

# Menghapus baris tanpa nomor Invoice/Nota
data.dropna(axis=0, subset=['Invoice'], inplace = True)
data['Invoice'] = data['Invoice'].astype('str').str.split('.').str[0]

# Memilih kolom yang akan digunakan
data = data[['Invoice', 'NamaPerusahaan', 'NamaBarang', 'Qty']]
data


Unnamed: 0,Invoice,NamaPerusahaan,NamaBarang,Qty
0,16470599,PT. BERSAMA SEJAHTERA SAKTI (GAF),OIL SEAL 120MM X 150MM X 14MM,10
1,16470599,PT. BERSAMA SEJAHTERA SAKTI (GAF),OIL SEAL 130MM X 160MM X 13MM,10
2,16470599,PT. BERSAMA SEJAHTERA SAKTI (GAF),OIL SEAL 150MM X 180MM X 14MM,10
3,16470600,PT. SWADAYA ANDIKA (SLF),CAT HAMPLE TAHAN PANAS 600˚C,10
4,16470601,PT. BERSAMA SEJAHTERA SAKTI (GAF),ATAP TRANSPARAN 0.8MM X 840MM X 6000MM,70
...,...,...,...,...
1235,2634670,PT. CITRA PUTRA KEBUN ASRI (CPKA),PACKING KARET 3MM (BENANG),2
1236,2634670,PT. CITRA PUTRA KEBUN ASRI (CPKA),ORING 3MM (TAHAN PANAS),10
1237,2634670,PT. CITRA PUTRA KEBUN ASRI (CPKA),OIL SEAL TC 45 X 65 X 10MM,5
1238,2634671,PT. LADANG RUMPUN SUBUR ABADI (ASF),"GLAND PACKING 3/8""",5


In [10]:
basket_group = (data.groupby(['Invoice', 'NamaBarang'])['Qty']
                 .sum().unstack().reset_index().fillna(0)
                 .set_index('Invoice'))

pd.set_option('display.max_columns', 20)
basket_group

NamaBarang,ACRILIC 10 MM,ACRILIC DOP/BENING 2M X 1M TB 5,ACRILIC NAMA MEJA UKURAN F4 VERTICAL,ACRILIC SHEET CLEAR 2000 X 1000 X 10MM,ADJUSTING CONE CB P15,ADJUSTING CONE SHAFT P15,AIR CHUCK ALDO,ALAT PEMBUKA BAN (LUMOS 3370),ALLUMINIUM SULFAT,AMPEREMETER 1000A,...,WEBBING SLING 2TON X 6M,WELDING ELECTRODE S/S 308 L SIZE 2.6MM,WELDING ELECTRODE S/S 308 L SIZE 3.2MM,WELDING ELECTRODE S/S 309 L SIZE 2.6MM,WELDING ELECTRODE S/S 309 L SIZE 3.2MM,WELDING HOLDER (STANG LAS) 600A,WIRE MESH SS 304 10 X 1 X 1000MM,WIRE SCAFFOLDING 2MM,Y STRAINER MERK : YONE C1 DN 100 PN 16,ZELIO LOGIC SR 2B121 FU
Invoice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16470599,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16470600,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16470601,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16470602,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
16470603,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5490169,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5490170,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5490171,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5490172,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
# Membuat method hot_encode untuk mengubah jumlah beli menjadi membeli(1) atau tidak(0)
def hot_encode(x):
    if(x <= 0):
        return 0
    if(x > 0):
        return 1

In [12]:
# Menerapkan funtion hot_encode
basket_group = basket_group.applymap(hot_encode)
basket_group

# DistribusiBarang = TransactionEncoder().fit_transform(data)

NamaBarang,ACRILIC 10 MM,ACRILIC DOP/BENING 2M X 1M TB 5,ACRILIC NAMA MEJA UKURAN F4 VERTICAL,ACRILIC SHEET CLEAR 2000 X 1000 X 10MM,ADJUSTING CONE CB P15,ADJUSTING CONE SHAFT P15,AIR CHUCK ALDO,ALAT PEMBUKA BAN (LUMOS 3370),ALLUMINIUM SULFAT,AMPEREMETER 1000A,...,WEBBING SLING 2TON X 6M,WELDING ELECTRODE S/S 308 L SIZE 2.6MM,WELDING ELECTRODE S/S 308 L SIZE 3.2MM,WELDING ELECTRODE S/S 309 L SIZE 2.6MM,WELDING ELECTRODE S/S 309 L SIZE 3.2MM,WELDING HOLDER (STANG LAS) 600A,WIRE MESH SS 304 10 X 1 X 1000MM,WIRE SCAFFOLDING 2MM,Y STRAINER MERK : YONE C1 DN 100 PN 16,ZELIO LOGIC SR 2B121 FU
Invoice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16470599,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
16470600,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
16470601,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
16470602,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
16470603,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5490169,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5490170,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5490171,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5490172,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Modelling

In [13]:
# Membangun model menggunakan algoritme Apriori
frq_items = apriori(basket_group, min_support = 0.002, use_colnames=True)

# Membentuk aturan-aturan asosiasi
rules = association_rules(frq_items, metric="confidence", min_threshold=0.3)
rules = rules.sort_values(['confidence', 'lift'], ascending=[False, False])



### Top 5 Apriori

In [14]:
rules.head(5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,"(SODA ASH, POWDER, KARUNGAN)",(ALLUMINIUM SULFAT),0.003284,0.003284,0.003284,1.0,304.5,0.003273,inf,1.0
1,(ALLUMINIUM SULFAT),"(SODA ASH, POWDER, KARUNGAN)",0.003284,0.003284,0.003284,1.0,304.5,0.003273,inf,1.0
10,(BELTING B 75 BANDO),(BELTING B 85 BANDO),0.003284,0.003284,0.003284,1.0,304.5,0.003273,inf,1.0
11,(BELTING B 85 BANDO),(BELTING B 75 BANDO),0.003284,0.003284,0.003284,1.0,304.5,0.003273,inf,1.0
16,"(BOLT & NUT 3/4"" X 3"")","(BOLT & NUT 5/8"" X 3"")",0.003284,0.003284,0.003284,1.0,304.5,0.003273,inf,1.0


### Bottom 5 Apriori

In [15]:
rules.tail(5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
19,"(BOLT & NUT 3/8"" X 1"")","(BOLT & NUT 3/4"" X 4"")",0.004926,0.004926,0.003284,0.666667,135.333333,0.00326,2.985222,0.997525
2,"(BATU GERINDA POTONG 4"" 100 X 1 X 16MM)",(AMPLAS KERTAS 800),0.006568,0.003284,0.003284,0.5,152.25,0.003263,1.993432,1.0
4,"(BATU GERINDA 4"")",(ANTI KARAT WD40),0.00821,0.003284,0.003284,0.4,121.8,0.003257,1.661193,1.0
6,"(BATU GERINDA 4"")","(BATU GERINDA 6"")",0.00821,0.003284,0.003284,0.4,121.8,0.003257,1.661193,1.0
8,"(BATU GERINDA 4"")","(BATU GERINDA POTONG 4"")",0.00821,0.003284,0.003284,0.4,121.8,0.003257,1.661193,1.0
