In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools

In [2]:
class Apriori:
    def __init__(self, min_supp, min_conf, products, transactions):
        self.min_supp = min_supp
        self.min_conf = min_conf
        self.products = products
        self.transactions = transactions
        self.trans_len = len(transactions)

    def occurence(self, p):
        occ = 0
        for t in self.transactions:
            occ += int(p in t)
        return occ/self.trans_len

    def supp(self, s):
        occ = 0
        for t in self.transactions:
            occ += int(s.issubset(t))
        return occ/self.trans_len
    
    def confidance(self, rule):
        return self.supp(rule[0] | rule[1])/self.supp(rule[0])
    
    def makeFrequentSets(self):
        F = list()
        F.append(frozenset({frozenset({p}) for p in self.products  if self.occurence(p) >= self.min_supp}))
        i = 1
#         print(F[0])
        while len(F[i-1]) > 0:
            C = { frozenset(L1 | L2) for L1 in F[i-1] for L2 in F[i-1] if len(L1 | L2) == i+1 and self.supp(L1 | L2) >= self.min_supp}
#             print(i,C)
            F.append(C)
            i += 1
        self.frequent_sets = list(itertools.chain.from_iterable(F))
        return self.frequent_sets
    
    def makeAssesRules(self):
        R = list()
        R.append({ (s,frozenset()) for s in self.frequent_sets})
        i = 1
        while len(R[i-1]) > 0:
            C = {(X[0] - {pre},X[1] | {pre}) for X in R[i-1] for pre in X[0] if self.confidance((X[0] - {pre},X[1] | {pre})) >= self.min_conf }
#             print(C)
            R.append(C)
            i += 1
        self.asses_rules = list(itertools.chain.from_iterable(R))
        return self.asses_rules
        
    def lift(self, rule):
        return self.confidance(rule)/self.supp(rule[1])
    
    def liftFilter(self, val):
        return [r for r in self.asses_rules if self.lift(r) >= val]
    
    def leverage(self, rule):
        return self.supp(rule[0] | rule[1]) - self.supp(rule[0]) * self.supp(rule[1])
    
    def leverageFilter(self, val):
        return [r for r in self.asses_rules if self.leverage(r) >= val]

In [3]:
allTransactions = [set([int(id) for id in line.split()]) for line in open('/home/dominik/Dokumenty/Studia/Data-mining/Lista6-assiessing_rules/Lista06Dane1/retail.dat')]

# data = pd.read_csv('/home/dominik/Dokumenty/Studia/Data-mining/Lista6-assiessing_rules/apriori/1000/1000-out1.csv', header=None)

In [10]:
np.max([np.max(list(t)) for t in allTransactions])

16469

In [11]:
products = list(range(16469))

In [12]:
a = Apriori(0.01,0.03,products, allTransactions)

In [13]:
a.makeFrequentSets()

[frozenset({49}),
 frozenset({589}),
 frozenset({78}),
 frozenset({533}),
 frozenset({10515}),
 frozenset({824}),
 frozenset({16010}),
 frozenset({237}),
 frozenset({338}),
 frozenset({740}),
 frozenset({179}),
 frozenset({438}),
 frozenset({475}),
 frozenset({39}),
 frozenset({677}),
 frozenset({32}),
 frozenset({242}),
 frozenset({16217}),
 frozenset({592}),
 frozenset({255}),
 frozenset({1004}),
 frozenset({123}),
 frozenset({2238}),
 frozenset({14098}),
 frozenset({147}),
 frozenset({9}),
 frozenset({522}),
 frozenset({185}),
 frozenset({310}),
 frozenset({479}),
 frozenset({413}),
 frozenset({15832}),
 frozenset({37}),
 frozenset({170}),
 frozenset({1393}),
 frozenset({60}),
 frozenset({264}),
 frozenset({2958}),
 frozenset({45}),
 frozenset({38}),
 frozenset({19}),
 frozenset({48}),
 frozenset({65}),
 frozenset({41}),
 frozenset({286}),
 frozenset({13041}),
 frozenset({956}),
 frozenset({89}),
 frozenset({270}),
 frozenset({225}),
 frozenset({1327}),
 frozenset({249}),
 frozenset

In [14]:
a.makeAssesRules()

[(frozenset({824}), frozenset()),
 (frozenset({39, 2238}), frozenset()),
 (frozenset({48, 255}), frozenset()),
 (frozenset({48, 270}), frozenset()),
 (frozenset({48}), frozenset()),
 (frozenset({48, 438}), frozenset()),
 (frozenset({39, 48, 170}), frozenset()),
 (frozenset({48, 237}), frozenset()),
 (frozenset({38, 39}), frozenset()),
 (frozenset({39, 475}), frozenset()),
 (frozenset({38, 39, 48}), frozenset()),
 (frozenset({2958}), frozenset()),
 (frozenset({39, 225}), frozenset()),
 (frozenset({677}), frozenset()),
 (frozenset({592}), frozenset()),
 (frozenset({48, 101}), frozenset()),
 (frozenset({36, 38, 48}), frozenset()),
 (frozenset({2238}), frozenset()),
 (frozenset({32, 48}), frozenset()),
 (frozenset({38, 170}), frozenset()),
 (frozenset({38, 39, 170}), frozenset()),
 (frozenset({39, 79}), frozenset()),
 (frozenset({38, 39, 41, 48}), frozenset()),
 (frozenset({45}), frozenset()),
 (frozenset({36, 38}), frozenset()),
 (frozenset({1393}), frozenset()),
 (frozenset({41}), frozen

In [29]:
a.liftFilter(5)

[(frozenset({38, 48}), frozenset({110})),
 (frozenset({110}), frozenset({38})),
 (frozenset({36, 48}), frozenset({38})),
 (frozenset({38, 39}), frozenset({36})),
 (frozenset({38, 39}), frozenset({110})),
 (frozenset({36}), frozenset({38})),
 (frozenset({38, 48}), frozenset({36})),
 (frozenset({39, 48, 110}), frozenset({38})),
 (frozenset({39, 170}), frozenset({38})),
 (frozenset({37}), frozenset({38})),
 (frozenset({38, 39}), frozenset({170})),
 (frozenset({38, 48}), frozenset({170})),
 (frozenset({39, 48, 170}), frozenset({38})),
 (frozenset({48, 170}), frozenset({38})),
 (frozenset({286}), frozenset({38})),
 (frozenset({38}), frozenset({37})),
 (frozenset({36, 39}), frozenset({38})),
 (frozenset({38, 39, 48}), frozenset({110})),
 (frozenset({38}), frozenset({36})),
 (frozenset({38, 39, 48}), frozenset({36})),
 (frozenset({38}), frozenset({110})),
 (frozenset({38, 39, 48}), frozenset({170})),
 (frozenset({36, 39, 48}), frozenset({38})),
 (frozenset({48, 110}), frozenset({38})),
 (froz

In [49]:
a.leverageFilter(0.02)

[(frozenset({41, 48}), frozenset({39})),
 (frozenset({110}), frozenset({38})),
 (frozenset({39}), frozenset({41})),
 (frozenset({48}), frozenset({39})),
 (frozenset({36}), frozenset({38})),
 (frozenset({41}), frozenset({48})),
 (frozenset({48}), frozenset({41})),
 (frozenset({38}), frozenset({36})),
 (frozenset({38}), frozenset({110})),
 (frozenset({39}), frozenset({48})),
 (frozenset({39, 41}), frozenset({48})),
 (frozenset({38}), frozenset({170})),
 (frozenset({170}), frozenset({38})),
 (frozenset({39, 48}), frozenset({41})),
 (frozenset({41}), frozenset({39})),
 (frozenset({48}), frozenset({39, 41})),
 (frozenset({41}), frozenset({39, 48})),
 (frozenset({39}), frozenset({41, 48}))]

In [50]:
a.occurence(41)

0.16951747918604387

Ciekawe:  
38 $\to$ 37  
38 $\to$ 286  
170 $\to$ 38  
110 $\to$ 38

In [52]:
a.lift(({38}, {286}))

5.3327061992400235