# Reguły asocjacyjne

### 1.1 Implementacja algorytmu apriori

In [380]:
def support(items, transactions ):
    i = 0
    for t in transactions:
        if items.issubset(t):
            i += 1        
    return i/len(transactions)


In [383]:
from itertools import chain
from itertools import combinations

def subsets(arr):
    return chain(*[combinations(arr, i + 1) for i, a in enumerate(arr)])


In [384]:
def my_apriori(transactions, min_support, min_confidence):
    
    
    items = set()
    for t in transactions:
        for item in t:
            if not item in items:
                items.add(item)
    
    
    keys = items
    vals = np.zeros(len(items), dtype=int)
    freq = dict(zip(keys, vals))

    for transaction in transactions:
        for item in transaction:
                    freq[item] += 1
    
    
    support_dict = {k: v/len(transactions) for k, v in freq.items()}
    L = {k: v for k, v in support_dict.items() if v >= min_support}

    l = []
    for k, v in support_dict.items():
        if v >= min_support:
            l.append( set([k]) )
    L = {1 : l}

    
    k=2
    while( L[k-1] ):
        L[k] = []
        for items_set in L[k-1]:
            for item in L[1]:
                if not item.issubset(items_set):
                    if support(items_set.union(item),transactions) > min_support:
                        if not items_set.union(item) in L[k]:
                            L[k].append( items_set.union(item) )
        k = k+1
    
    
    rules = []
    for key, value in L.items():
        for item in value:
            _subsets = map(set, [x for x in subsets(item)])
            for element in _subsets:
                remain = item.difference(element)
                if len(remain) > 0:
                    confidence = np.round(support(item, transactions)/support(element, transactions),3)
                    if confidence >= min_confidence:
                        supp = np.round( support(item, transactions), 3)
                        lift = np.round(confidence / support(remain, transactions), 3)
                        leverage = np.round(supp - support(element, transactions) * support(remain, transactions),3)
                        rules.append([element, remain, confidence, supp, lift, leverage])
    return rules
        

### 1.2 Test data

In [387]:
transactions_test = [{1, 2, 3},
{1, 2, 4},
{3, 2, 5}]

### 1.3 Python Apriori

In [388]:
from efficient_apriori import apriori
itemsets, rules = apriori(transactions_test, 0.5, 1)
for rule in rules:
    print(rule)

{1} -> {2} (conf: 1.000, supp: 0.667, lift: 1.000, conv: 0.000)
{3} -> {2} (conf: 1.000, supp: 0.667, lift: 1.000, conv: 0.000)


### 1.4 My Apriori

In [391]:
rules = my_apriori(transactions_test, 0.5, 1)
for rule in rules:
    print( str(rule[0]) + " -> "+ str(rule[1]) + " conf: " + str(rule[2]) + " supp: " + str(rule[3]) + " lift: " + str(rule[4]) + " leverage: " + str(rule[5]) )


{1} -> {2} conf: 1.0 supp: 0.667 lift: 1.0 leverage: 0.0
{3} -> {2} conf: 1.0 supp: 0.667 lift: 1.0 leverage: 0.0


In [452]:
from sklearn.datasets import load_iris
iris = load_iris()
iris_x = iris.data
iris_y = iris.target
iris_transactions = []
for i in range(150):
    if i <50:
        s = set(np.round(iris_x[i]*100,0).astype(int) )
        s = s.union({0})
        iris_transactions.append(s)
    if i >= 50 and i <100:
        s = set(np.round(iris_x[i]*100,0).astype(int) )
        s = s.union({1})
        iris_transactions.append(s)
    if i >=100 and i <150:
        s = set(np.round(iris_x[i]*100,0).astype(int) )
        s = s.union({2})
        iris_transactions.append(s)
iris_transactions


[{0, 20, 140, 350, 510},
 {0, 20, 140, 300, 490},
 {0, 20, 130, 320, 470},
 {0, 20, 150, 310, 460},
 {0, 20, 140, 360, 500},
 {0, 40, 170, 390, 540},
 {0, 30, 140, 340, 460},
 {0, 20, 150, 340, 500},
 {0, 20, 140, 290, 440},
 {0, 10, 150, 310, 490},
 {0, 20, 150, 370, 540},
 {0, 20, 160, 340, 480},
 {0, 10, 140, 300, 480},
 {0, 10, 110, 300, 430},
 {0, 20, 120, 400, 580},
 {0, 40, 150, 440, 570},
 {0, 40, 130, 390, 540},
 {0, 30, 140, 350, 510},
 {0, 30, 170, 380, 570},
 {0, 30, 150, 380, 510},
 {0, 20, 170, 340, 540},
 {0, 40, 150, 370, 510},
 {0, 20, 100, 360, 460},
 {0, 50, 170, 330, 510},
 {0, 20, 190, 340, 480},
 {0, 20, 160, 300, 500},
 {0, 40, 160, 340, 500},
 {0, 20, 150, 350, 520},
 {0, 20, 140, 340, 520},
 {0, 20, 160, 320, 470},
 {0, 20, 160, 310, 480},
 {0, 40, 150, 340, 540},
 {0, 10, 150, 410, 520},
 {0, 20, 140, 420, 550},
 {0, 20, 150, 310, 490},
 {0, 20, 120, 320, 500},
 {0, 20, 130, 350, 550},
 {0, 10, 140, 360, 490},
 {0, 20, 130, 300, 440},
 {0, 20, 150, 340, 510},


In [457]:
rules = my_apriori(iris_transactions, 0.05, .5)
for rule in rules:
    print( str(rule[0]) + " -> "+ str(rule[1]) + " conf: " + str(rule[2]) + " supp: " + str(rule[3]) + " lift: " + str(rule[4]) + " leverage: " + str(rule[5]) )

{140} -> {0} conf: 0.619 supp: 0.087 lift: 1.857 leverage: 0.04
{0} -> {20} conf: 0.58 supp: 0.193 lift: 3.0 leverage: 0.129
{20} -> {0} conf: 1.0 supp: 0.193 lift: 3.0 leverage: 0.129
{150} -> {0} conf: 0.52 supp: 0.087 lift: 1.56 leverage: 0.031
{340} -> {0} conf: 0.75 supp: 0.06 lift: 2.25 leverage: 0.033
{500} -> {0} conf: 0.571 supp: 0.053 lift: 1.713 leverage: 0.022
{130} -> {1} conf: 0.65 supp: 0.087 lift: 1.95 leverage: 0.043
{280} -> {2} conf: 0.571 supp: 0.053 lift: 1.713 leverage: 0.022
{180} -> {2} conf: 0.917 supp: 0.073 lift: 2.751 leverage: 0.046
{230} -> {2} conf: 0.667 supp: 0.053 lift: 2.001 leverage: 0.026
{0, 140} -> {20} conf: 0.615 supp: 0.053 lift: 3.181 leverage: 0.036
{140, 20} -> {0} conf: 1.0 supp: 0.053 lift: 3.0 leverage: 0.035


# 2 Retail dataset

### 2.1 Load data

In [393]:
lines = open("retail/retail.dat").read().replace(" \n","\n").split('\n')
transactions_retail = []
for line in lines:
    t = set(list(np.fromstring(line,sep=' ', dtype=int)))
    transactions_retail.append(t)
a = transactions_retail.pop(0)

### 2.2 Python apriori

In [394]:
itemsets, rules = apriori(transactions_retail, min_support=0.05, min_confidence=.1)
for rule in rules:
    print(rule)

{39} -> {32} (conf: 0.167, supp: 0.096, lift: 0.970, conv: 0.994)
{32} -> {39} (conf: 0.557, supp: 0.096, lift: 0.970, conv: 0.961)
{48} -> {32} (conf: 0.191, supp: 0.091, lift: 1.108, conv: 1.023)
{32} -> {48} (conf: 0.530, supp: 0.091, lift: 1.108, conv: 1.110)
{39} -> {38} (conf: 0.204, supp: 0.117, lift: 1.154, conv: 1.034)
{38} -> {39} (conf: 0.663, supp: 0.117, lift: 1.154, conv: 1.263)
{48} -> {38} (conf: 0.189, supp: 0.090, lift: 1.066, conv: 1.014)
{38} -> {48} (conf: 0.509, supp: 0.090, lift: 1.066, conv: 1.064)
{41} -> {39} (conf: 0.764, supp: 0.129, lift: 1.329, conv: 1.800)
{39} -> {41} (conf: 0.225, supp: 0.129, lift: 1.329, conv: 1.072)
{48} -> {39} (conf: 0.692, supp: 0.331, lift: 1.203, conv: 1.379)
{39} -> {48} (conf: 0.575, supp: 0.331, lift: 1.203, conv: 1.229)
{48} -> {41} (conf: 0.214, supp: 0.102, lift: 1.263, conv: 1.057)
{41} -> {48} (conf: 0.603, supp: 0.102, lift: 1.263, conv: 1.316)
{39, 48} -> {32} (conf: 0.185, supp: 0.061, lift: 1.077, conv: 1.016)
{32, 4

### 2.3 My apriori

In [395]:
rules = my_apriori(transactions_retail, 0.05, .1)
for rule in rules:
    print( str(rule[0]) + " -> "+ str(rule[1]) + " conf: " + str(rule[2]) + " supp: " + str(rule[3]) + " lift: " + str(rule[4]) + " leverage: " + str(rule[5]) )


{32} -> {39} conf: 0.557 supp: 0.096 lift: 0.969 leverage: -0.003
{39} -> {32} conf: 0.167 supp: 0.096 lift: 0.971 leverage: -0.003
{32} -> {48} conf: 0.53 supp: 0.091 lift: 1.109 leverage: 0.009
{48} -> {32} conf: 0.191 supp: 0.091 lift: 1.11 leverage: 0.009
{38} -> {39} conf: 0.663 supp: 0.117 lift: 1.153 leverage: 0.015
{39} -> {38} conf: 0.204 supp: 0.117 lift: 1.153 leverage: 0.015
{48} -> {38} conf: 0.189 supp: 0.09 lift: 1.068 leverage: 0.005
{38} -> {48} conf: 0.509 supp: 0.09 lift: 1.065 leverage: 0.005
{41} -> {39} conf: 0.764 supp: 0.129 lift: 1.329 leverage: 0.032
{39} -> {41} conf: 0.225 supp: 0.129 lift: 1.327 leverage: 0.032
{48} -> {39} conf: 0.692 supp: 0.331 lift: 1.204 leverage: 0.056
{39} -> {48} conf: 0.575 supp: 0.331 lift: 1.203 leverage: 0.056
{48} -> {41} conf: 0.214 supp: 0.102 lift: 1.262 leverage: 0.021
{41} -> {48} conf: 0.603 supp: 0.102 lift: 1.262 leverage: 0.021
{32} -> {48, 39} conf: 0.356 supp: 0.061 lift: 1.077 leverage: 0.004
{48} -> {32, 39} conf: 

# 3 Kosarak dataset

### 3.1 Load data

In [368]:

lines = open("kosarak/kosarak.dat").read().replace(" \n","\n").split('\n')
transactions_kosarak = []
for line in lines:
    t = set(list(np.fromstring(line,sep=' ', dtype=int)))
    transactions_kosarak.append(t)

### 3.2 My apriori

In [392]:
rules = my_apriori(transactions_kosarak, 0.05, .7)
for rule in rules:
    print( str(rule[0]) + " -> "+ str(rule[1]) + " conf: " + str(rule[2]) + " supp: " + str(rule[3]) + " lift: " + str(rule[4]) + " leverage: " + str(rule[5]) )


{7} -> {6} conf: 0.847 supp: 0.074 lift: 1.394 leverage: 0.021
{11} -> {6} conf: 0.89 supp: 0.327 lift: 1.465 leverage: 0.104
{27} -> {6} conf: 0.824 supp: 0.06 lift: 1.356 leverage: 0.016
{148} -> {6} conf: 0.926 supp: 0.065 lift: 1.524 leverage: 0.022
{218} -> {6} conf: 0.877 supp: 0.078 lift: 1.444 leverage: 0.024
{148} -> {11} conf: 0.797 supp: 0.056 lift: 2.167 leverage: 0.03
{148} -> {218} conf: 0.841 supp: 0.059 lift: 9.397 leverage: 0.053
{1, 11} -> {6} conf: 0.937 supp: 0.087 lift: 1.543 leverage: 0.031
{3, 11} -> {6} conf: 0.891 supp: 0.145 lift: 1.467 leverage: 0.046
{11, 7} -> {6} conf: 0.978 supp: 0.056 lift: 1.61 leverage: 0.021
{6, 7} -> {11} conf: 0.759 supp: 0.056 lift: 2.064 leverage: 0.029
{148} -> {11, 6} conf: 0.79 supp: 0.056 lift: 2.414 leverage: 0.033
{11, 148} -> {6} conf: 0.991 supp: 0.056 lift: 1.631 leverage: 0.022
{148, 6} -> {11} conf: 0.853 supp: 0.056 lift: 2.32 leverage: 0.032
{218, 11} -> {6} conf: 0.983 supp: 0.061 lift: 1.618 leverage: 0.023
{218, 6}

### 3.3 Python apriori

In [378]:
itemsets, rules = apriori(transactions_kosarak, min_support=0.05, min_confidence=.7)
for rule in rules:
    print(rule)

{7} -> {6} (conf: 0.847, supp: 0.074, lift: 1.394, conv: 2.567)
{11} -> {6} (conf: 0.890, supp: 0.327, lift: 1.465, conv: 3.568)
{27} -> {6} (conf: 0.824, supp: 0.060, lift: 1.356, conv: 2.227)
{148} -> {6} (conf: 0.926, supp: 0.065, lift: 1.524, conv: 5.307)
{218} -> {6} (conf: 0.877, supp: 0.078, lift: 1.443, conv: 3.184)
{148} -> {11} (conf: 0.797, supp: 0.056, lift: 2.168, conv: 3.121)
{148} -> {218} (conf: 0.841, supp: 0.059, lift: 9.400, conv: 5.736)
{1, 11} -> {6} (conf: 0.937, supp: 0.087, lift: 1.542, conv: 6.229)
{3, 11} -> {6} (conf: 0.891, supp: 0.145, lift: 1.467, conv: 3.597)
{7, 11} -> {6} (conf: 0.978, supp: 0.056, lift: 1.610, conv: 18.083)
{6, 7} -> {11} (conf: 0.759, supp: 0.056, lift: 2.063, conv: 2.618)
{11, 148} -> {6} (conf: 0.991, supp: 0.056, lift: 1.631, conv: 41.377)
{6, 148} -> {11} (conf: 0.853, supp: 0.056, lift: 2.319, conv: 4.300)
{148} -> {6, 11} (conf: 0.790, supp: 0.056, lift: 2.413, conv: 3.202)
{11, 218} -> {6} (conf: 0.983, supp: 0.061, lift: 1.619