# ETS DATA MINING
## Association Rule
### Online Retail Dataset

In [1]:
import pandas as pd
import numpy as np
from apyori import apriori

In [2]:
data = pd.read_excel('E:\Kuliah\Semester 6\DATMIN\ETS\Online Retail.xlsx')

In [3]:
# menghilangkan data transaksi yang diawali dengan huruf C
# karena kode transaksi yang dimulai dengan huruf 'C' ini menunjukkan pembatalan
data['InvoiceNo'] = data['InvoiceNo'].astype('str')
data = data[~data['InvoiceNo'].str.contains('C')]

In [4]:
# menghilangkan data kuantitas yang bernilai -
# karena kuantitas yang bernilai - menunjukkan bahwa tidak terjadi transaksi (bisa pembatalan atau masalah lainnya)
data['Quantity'] = data['Quantity'].astype('str')
data = data[~data['Quantity'].str.contains('-')]

In [5]:
# menghilangkan data harga satuan yang bernilai 0
# karena apabila ada transaksi harga satuan tidak mungkin bernilai 0 (harga satuan 0 pada deskripsi barang yang dibeli berisi kosong atau terdapat masalah lainnya)
data['UnitPrice'] = data['UnitPrice'].astype('str')
data = data[~data['UnitPrice'].str.contains('0')]

In [6]:
# menghilangkan data harga satuan yang bernilai -
# karena apabila ada transaksi harga satuan tidak mungkin bernilai negatif
data['UnitPrice'] = data['UnitPrice'].astype('str')
data = data[~data['UnitPrice'].str.contains('-')]

In [7]:
np.sum(data.isnull())

InvoiceNo           0
StockCode           0
Description         0
Quantity            0
InvoiceDate         0
UnitPrice           0
CustomerID     107989
Country             0
dtype: int64

In [8]:
# membuat list deskripsi barang yang dibeli tiap kode transaksi (InvoiceNo)
x = data.groupby('InvoiceNo')
transaksi = []
for i, j  in x :
    transaksi.append(list(j['Description'].map(str)))

In [9]:
databaru = pd.DataFrame(transaksi) 

In [10]:
databaru.shape

(19168, 923)

In [11]:
records=[]
for i in range (0,19168): 
    records.append([str(databaru.values[i,j]) for j in range (0, 923)])

In [12]:
# association rule
association_rules=apriori(records,min_support=0.008, 
                          min_confidence=0.7, 
                          min_lift=6, min_length=2) 

association_result=list(association_rules)

In [13]:
association_result[0]

RelationRecord(items=frozenset({'PAINTED METAL PEARS ASSORTED', 'ASSORTED COLOUR BIRD ORNAMENT'}), support=0.013512103505843072, ordered_statistics=[OrderedStatistic(items_base=frozenset({'PAINTED METAL PEARS ASSORTED'}), items_add=frozenset({'ASSORTED COLOUR BIRD ORNAMENT'}), confidence=0.7214484679665737, lift=9.504277824043495)])

In [14]:
for item in association_result:
    #first index of the inner list
    #contains base item and add item
    pair = item[0]
    items=[x for x in pair]
    print("rule: " + items[0] + "->" + items[1])
    
    #second index of inner list
    print("support: " + str(item[1]))
    
    #third index of the list located at 10th
    #of the third index of the inner list
    
    print("confidence: " + str(item[2][0][2]))
    print("lift: " + str(item[2][0][3]))
    print("==========================================================================")

rule: PAINTED METAL PEARS ASSORTED->ASSORTED COLOUR BIRD ORNAMENT
support: 0.013512103505843072
confidence: 0.7214484679665737
lift: 9.504277824043495
rule: BAKING SET SPACEBOY DESIGN->BAKING SET 9 PIECE RETROSPOT 
support: 0.016016277128547578
confidence: 0.7309523809523809
lift: 16.216313932980597
rule: JUMBO BAG RED RETROSPOT->BATHROOM METAL SIGN
support: 0.009442821368948248
confidence: 0.7182539682539684
lift: 13.604241169458563
rule: BLUE HAPPY BIRTHDAY BUNTING->PINK HAPPY BIRTHDAY BUNTING
support: 0.013459933222036728
confidence: 0.7206703910614525
lift: 38.05457315665543
rule: WHITE HANGING HEART T-LIGHT HOLDER->CANDLEHOLDER PINK HANGING HEART
support: 0.014712020033388982
confidence: 0.7085427135678392
lift: 6.0201005025125625
rule: CHARLOTTE BAG SUKI DESIGN->CHARLOTTE BAG PINK POLKADOT
support: 0.00975584307178631
confidence: 0.7030075187969924
lift: 45.83417727993453
rule: CHARLOTTE BAG PINK POLKADOT->RED RETROSPOT CHARLOTTE BAG
support: 0.010590567612687812
confidence: 0.76

rule: PINK REGENCY TEACUP AND SAUCER->JAM MAKING SET WITH JARS
support: 0.008190734557595994
confidence: 0.8820224719101124
lift: 16.689641403329748
rule: ROSES REGENCY TEACUP AND SAUCER ->JAM MAKING SET WITH JARS
support: 0.010851419031719533
confidence: 0.8030888030888031
lift: 14.454090307611436
rule: ROSES REGENCY TEACUP AND SAUCER ->None
support: 0.008868948247078463
confidence: 0.711297071129707
lift: 12.802011511187064
rule: ROSES REGENCY TEACUP AND SAUCER ->NATURAL SLATE HEART CHALKBOARD 
support: 0.008086393989983306
confidence: 0.7380952380952381
lift: 13.284328191370445
rule: PINK REGENCY TEACUP AND SAUCER->None
support: 0.015442404006677797
confidence: 0.880952380952381
lift: 16.66939312743854
rule: PINK REGENCY TEACUP AND SAUCER->REGENCY TEA PLATE GREEN 
support: 0.009547161936560935
confidence: 0.756198347107438
lift: 18.94746394425539
rule: PINK REGENCY TEACUP AND SAUCER->None
support: 0.008399415692821368
confidence: 0.8846153846153846
lift: 22.16510809451986
rule: PINK

support: 0.010747078464106845
confidence: 0.907488986784141
lift: 39.08932336781666
rule: REGENCY TEA PLATE PINK->ROSES REGENCY TEACUP AND SAUCER 
support: 0.00933848080133556
confidence: 0.9322916666666667
lift: 40.15767790262172
rule: SET OF 3 WOODEN HEART DECORATIONS->SET OF 3 WOODEN STOCKING DECORATION
support: 0.008973288814691152
confidence: 0.8
lift: 52.515068493150686
rule: SET OF 3 WOODEN STOCKING DECORATION->SET OF 3 WOODEN SLEIGH DECORATIONS
support: 0.008190734557595994
confidence: 0.7810945273631841
lift: 51.27404075512847
rule: PINK REGENCY TEACUP AND SAUCER->ROSES REGENCY TEACUP AND SAUCER 
support: 0.008190734557595994
confidence: 0.9289940828402368
lift: 16.720148901297332
rule: POPPY'S PLAYHOUSE LIVINGROOM ->POPPY'S PLAYHOUSE BATHROOM
support: 0.008034223706176962
confidence: 0.9058823529411767
lift: 53.10077352041735
rule: REGENCY TEA PLATE PINK->ROSES REGENCY TEACUP AND SAUCER 
support: 0.00881677796327212
confidence: 0.9548022598870056
lift: 41.127302735986795
rule