In [1]:
import numpy as np
import pandas as pd
from itertools import combinations
from itertools import permutations

In [2]:
def APRIORI_MY(data, min_support=0.2, max_length = 4):
    support = {}
    L = set(data.columns)
    
    for i in range(1, max_length+1):
        c = list(combinations(L,i))
        L = set()
        
        for j in list(c):
            sup = data.loc[:,j].product(axis=1).sum()/len(data.index)
            if sup > min_support:
                support[j] = sup
                
                L = set(set(L) | set(j))
                
    result = pd.DataFrame(list(support.items()), columns = ['items', 'support'])
    return(result)

In [3]:
data = pd.read_csv('datasetTugas4.csv')
dataset = data.set_index('id')
dataset

Unnamed: 0_level_0,A,B,C,D,E
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1,1,0,0,0
2,0,1,1,1,0
3,1,0,1,1,1
4,1,0,0,1,1
5,1,1,1,0,0
6,1,1,1,1,0
7,0,1,1,0,0
8,1,1,1,0,0
9,1,1,0,1,0
10,0,1,1,0,1


In [4]:
df = APRIORI_MY(dataset)
df

Unnamed: 0,items,support
0,"(B,)",0.8
1,"(A,)",0.7
2,"(C,)",0.7
3,"(E,)",0.3
4,"(D,)",0.5
5,"(B, A)",0.5
6,"(B, C)",0.6
7,"(B, D)",0.3
8,"(A, C)",0.4
9,"(A, D)",0.4


In [5]:
def ASSOCIATION_RULE(df, min_confidence=0.5):
    support = pd.Series(df.support.values, index=df['items'].values).to_dict()
    data = []
    L = df['items'].values
    
    p = list(permutations(L,2))
    
    for i in p:
        
        if set(i[0]).issubset(i[1]):
            conf = support[i[1]]/support[i[0]]
            if conf > min_confidence:
                j = i[1][not i[1].index(i[0][0])]
                lift = support[i[1]]/(support[i[0]] * support[(j,)])
                data.append([i[0], (j,), support[i[0]], support[(j,)], support[i[1]], conf, lift])
    
    result = pd.DataFrame(data,columns=['antecedents','consequents','antecedent support','consequent support',
                                        'support', 'confidence','lift'])
    
    return(result)

In [6]:
my_assoc = ASSOCIATION_RULE(df)
my_assoc

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift
0,"(B,)","(A,)",0.8,0.7,0.5,0.625,0.892857
1,"(B,)","(C,)",0.8,0.7,0.6,0.75,1.071429
2,"(A,)","(B,)",0.7,0.8,0.5,0.714286,0.892857
3,"(A,)","(C,)",0.7,0.7,0.4,0.571429,0.816327
4,"(A,)","(D,)",0.7,0.5,0.4,0.571429,1.142857
5,"(C,)","(B,)",0.7,0.8,0.6,0.857143,1.071429
6,"(C,)","(A,)",0.7,0.7,0.4,0.571429,0.816327
7,"(D,)","(B,)",0.5,0.8,0.3,0.6,0.75
8,"(D,)","(A,)",0.5,0.7,0.4,0.8,1.142857
9,"(D,)","(C,)",0.5,0.7,0.3,0.6,0.857143


In [11]:
my_assoc['korelasi'] = np.select([(my_assoc['lift'] == 1),(my_assoc['lift'] > 1),(my_assoc['lift'] < 1)], ['Independen','Positif','Negatif'])
my_assoc

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,korelasi
0,"(B,)","(A,)",0.8,0.7,0.5,0.625,0.892857,Negatif
1,"(B,)","(C,)",0.8,0.7,0.6,0.75,1.071429,Positif
2,"(A,)","(B,)",0.7,0.8,0.5,0.714286,0.892857,Negatif
3,"(A,)","(C,)",0.7,0.7,0.4,0.571429,0.816327,Negatif
4,"(A,)","(D,)",0.7,0.5,0.4,0.571429,1.142857,Positif
5,"(C,)","(B,)",0.7,0.8,0.6,0.857143,1.071429,Positif
6,"(C,)","(A,)",0.7,0.7,0.4,0.571429,0.816327,Negatif
7,"(D,)","(B,)",0.5,0.8,0.3,0.6,0.75,Negatif
8,"(D,)","(A,)",0.5,0.7,0.4,0.8,1.142857,Positif
9,"(D,)","(C,)",0.5,0.7,0.3,0.6,0.857143,Negatif


# Analisis asosiasi menggunakan library mlxtend.frequent_pattern

In [8]:
from mlxtend.frequent_patterns import apriori, association_rules

frequent_itemset = apriori(dataset,min_support=0.21, use_colnames=True)
frequent_itemset 

Unnamed: 0,support,itemsets
0,0.7,(A)
1,0.8,(B)
2,0.7,(C)
3,0.5,(D)
4,0.3,(E)
5,0.5,"(B, A)"
6,0.4,"(A, C)"
7,0.4,"(D, A)"
8,0.6,"(B, C)"
9,0.3,"(B, D)"


In [9]:
rules = association_rules(frequent_itemset, min_threshold=0.5)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(B),(A),0.8,0.7,0.5,0.625,0.892857,-0.06,0.8
1,(A),(B),0.7,0.8,0.5,0.714286,0.892857,-0.06,0.7
2,(A),(C),0.7,0.7,0.4,0.571429,0.816327,-0.09,0.7
3,(C),(A),0.7,0.7,0.4,0.571429,0.816327,-0.09,0.7
4,(D),(A),0.5,0.7,0.4,0.8,1.142857,0.05,1.5
5,(A),(D),0.7,0.5,0.4,0.571429,1.142857,0.05,1.166667
6,(B),(C),0.8,0.7,0.6,0.75,1.071429,0.04,1.2
7,(C),(B),0.7,0.8,0.6,0.857143,1.071429,0.04,1.4
8,(D),(B),0.5,0.8,0.3,0.6,0.75,-0.1,0.5
9,(D),(C),0.5,0.7,0.3,0.6,0.857143,-0.05,0.75


In [12]:
rules['korelasi'] = np.select([(rules['lift'] == 1),(rules['lift'] > 1),(rules['lift'] < 1)], ['Independen','Positif','Negatif'])
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,korelasi
0,(B),(A),0.8,0.7,0.5,0.625,0.892857,-0.06,0.8,Negatif
1,(A),(B),0.7,0.8,0.5,0.714286,0.892857,-0.06,0.7,Negatif
2,(A),(C),0.7,0.7,0.4,0.571429,0.816327,-0.09,0.7,Negatif
3,(C),(A),0.7,0.7,0.4,0.571429,0.816327,-0.09,0.7,Negatif
4,(D),(A),0.5,0.7,0.4,0.8,1.142857,0.05,1.5,Positif
5,(A),(D),0.7,0.5,0.4,0.571429,1.142857,0.05,1.166667,Positif
6,(B),(C),0.8,0.7,0.6,0.75,1.071429,0.04,1.2,Positif
7,(C),(B),0.7,0.8,0.6,0.857143,1.071429,0.04,1.4,Positif
8,(D),(B),0.5,0.8,0.3,0.6,0.75,-0.1,0.5,Negatif
9,(D),(C),0.5,0.7,0.3,0.6,0.857143,-0.05,0.75,Negatif
