In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools

In [2]:
class Apriori:
    def __init__(self, min_supp, min_conf, products, transactions):
        self.min_supp = min_supp
        self.min_conf = min_conf
        self.products = products
        self.transactions = transactions
        self.trans_len = len(transactions)

    def occurence(self, p):
        occ = 0
        for t in self.transactions:
            occ += int(p in t)
        return occ/self.trans_len

    def supp(self, s):
        occ = 0
        for t in self.transactions:
            occ += int(s.issubset(t))
        return occ/self.trans_len
    
    def confidance(self, rule):
        return self.supp(rule[0] | rule[1])/self.supp(rule[0])
    
    def makeFrequentSets(self):
        F = list()
        F.append(frozenset({frozenset({p}) for p in self.products  if self.occurence(p) >= self.min_supp}))
        i = 1
#         print(F[0])
        while len(F[i-1]) > 0:
            C = { frozenset(L1 | L2) for L1 in F[i-1] for L2 in F[i-1] if len(L1 | L2) == i+1 and self.supp(L1 | L2) >= self.min_supp}
#             print(i,C)
            F.append(C)
            i += 1
        self.frequent_sets = list(itertools.chain.from_iterable(F))
        return self.frequent_sets
    
    def makeAssesRules(self):
        R = list()
        R.append({ (s,frozenset()) for s in self.frequent_sets})
        i = 1
        while len(R[i-1]) > 0:
            C = {(X[0] - {pre},X[1] | {pre}) for X in R[i-1] for pre in X[0] if self.confidance((X[0] - {pre},X[1] | {pre})) >= self.min_conf }
#             print(C)
            R.append(C)
            i += 1
        self.asses_rules = list(itertools.chain.from_iterable(R))
        return self.asses_rules
        
    def lift(self, rule):
        return self.confidance(rule)/self.supp(rule[1])
    
    def liftFilter(self, val):
        return [r for r in self.asses_rules if self.lift(r) >= val]
    
    def leverage(self, rule):
        return self.supp(rule[0] | rule[1]) - self.supp(rule[0]) * self.supp(rule[1])
    
    def leverageFilter(self, val):
        return [r for r in self.asses_rules if self.leverage(r) >= val]

In [3]:
allTransactions = [set([int(id) for id in line.split()]) for line in open('/home/dominik/Dokumenty/Studia/Data-mining/Lista6-assiessing_rules/kosarak.dat')]

# data = pd.read_csv('/home/dominik/Dokumenty/Studia/Data-mining/Lista6-assiessing_rules/apriori/1000/1000-out1.csv', header=None)

In [4]:
np.max([np.max(list(t)) for t in allTransactions])

41270

In [5]:
products = list(range(41270))

In [10]:
a = Apriori(0.01,0.03,products, allTransactions)

In [11]:
a.makeFrequentSets()

[frozenset({229}),
 frozenset({49}),
 frozenset({294}),
 frozenset({77}),
 frozenset({87}),
 frozenset({278}),
 frozenset({56}),
 frozenset({254}),
 frozenset({135}),
 frozenset({138}),
 frozenset({3}),
 frozenset({28}),
 frozenset({314}),
 frozenset({438}),
 frozenset({987}),
 frozenset({32}),
 frozenset({269}),
 frozenset({11}),
 frozenset({136}),
 frozenset({14}),
 frozenset({4}),
 frozenset({90}),
 frozenset({737}),
 frozenset({504}),
 frozenset({73}),
 frozenset({446}),
 frozenset({64}),
 frozenset({273}),
 frozenset({6}),
 frozenset({303}),
 frozenset({40}),
 frozenset({1}),
 frozenset({667}),
 frozenset({155}),
 frozenset({490}),
 frozenset({205}),
 frozenset({25}),
 frozenset({215}),
 frozenset({7}),
 frozenset({512}),
 frozenset({2}),
 frozenset({91}),
 frozenset({423}),
 frozenset({148}),
 frozenset({316}),
 frozenset({218}),
 frozenset({83}),
 frozenset({364}),
 frozenset({69}),
 frozenset({55}),
 frozenset({378}),
 frozenset({897}),
 frozenset({27}),
 frozenset({361}),
 fro

In [12]:
a.makeAssesRules()

[(frozenset({1, 6, 11, 148, 218}), frozenset()),
 (frozenset({6, 7, 148, 218}), frozenset()),
 (frozenset({3, 6, 7}), frozenset()),
 (frozenset({27}), frozenset()),
 (frozenset({3, 4, 6, 11}), frozenset()),
 (frozenset({1, 3, 6, 148}), frozenset()),
 (frozenset({1, 6, 7, 11}), frozenset()),
 (frozenset({3, 64}), frozenset()),
 (frozenset({3, 6, 64}), frozenset()),
 (frozenset({11, 316}), frozenset()),
 (frozenset({77, 148}), frozenset()),
 (frozenset({1, 3, 6, 148, 218}), frozenset()),
 (frozenset({3, 11, 148, 218}), frozenset()),
 (frozenset({1, 138}), frozenset()),
 (frozenset({6, 136}), frozenset()),
 (frozenset({1, 6, 7, 27}), frozenset()),
 (frozenset({11, 504}), frozenset()),
 (frozenset({6, 11, 40}), frozenset()),
 (frozenset({6, 87}), frozenset()),
 (frozenset({1, 3, 6, 218}), frozenset()),
 (frozenset({87}), frozenset()),
 (frozenset({278}), frozenset()),
 (frozenset({7, 218}), frozenset()),
 (frozenset({83}), frozenset()),
 (frozenset({6, 148, 218}), frozenset()),
 (frozenset

In [15]:
a.liftFilter(5)

[(frozenset({11, 148}), frozenset({987})),
 (frozenset({218}), frozenset({90})),
 (frozenset({6, 11, 27, 87}), frozenset({7})),
 (frozenset({278}), frozenset({7})),
 (frozenset({7, 11, 218}), frozenset({148})),
 (frozenset({6, 7, 11, 27}), frozenset({87})),
 (frozenset({6, 148}), frozenset({987})),
 (frozenset({3, 11, 148}), frozenset({218})),
 (frozenset({1, 3, 218}), frozenset({148})),
 (frozenset({6, 11, 83}), frozenset({7})),
 (frozenset({7, 11}), frozenset({205})),
 (frozenset({1, 6, 11}), frozenset({90})),
 (frozenset({6, 87}), frozenset({7})),
 (frozenset({987}), frozenset({148})),
 (frozenset({6, 7, 148}), frozenset({218})),
 (frozenset({11, 87}), frozenset({27})),
 (frozenset({1, 3, 148}), frozenset({218})),
 (frozenset({11, 27, 87}), frozenset({7})),
 (frozenset({6, 11, 987}), frozenset({148})),
 (frozenset({27}), frozenset({83})),
 (frozenset({6, 7}), frozenset({897})),
 (frozenset({1, 11, 148}), frozenset({218})),
 (frozenset({6, 11, 205}), frozenset({7})),
 (frozenset({3, 

In [17]:
a.liftFilter(10)

[(frozenset({11, 148}), frozenset({987})),
 (frozenset({6, 11, 27, 87}), frozenset({7})),
 (frozenset({7, 11, 218}), frozenset({148})),
 (frozenset({6, 7, 11, 27}), frozenset({87})),
 (frozenset({6, 148}), frozenset({987})),
 (frozenset({1, 3, 218}), frozenset({148})),
 (frozenset({6, 87}), frozenset({7})),
 (frozenset({987}), frozenset({148})),
 (frozenset({6, 7, 148}), frozenset({218})),
 (frozenset({11, 87}), frozenset({27})),
 (frozenset({11, 27, 87}), frozenset({7})),
 (frozenset({6, 11, 987}), frozenset({148})),
 (frozenset({6, 7}), frozenset({897})),
 (frozenset({1, 11, 148}), frozenset({218})),
 (frozenset({3, 6, 11, 218}), frozenset({148})),
 (frozenset({11, 87}), frozenset({7})),
 (frozenset({6, 7, 27}), frozenset({87})),
 (frozenset({11, 987}), frozenset({148})),
 (frozenset({1, 11, 218}), frozenset({148})),
 (frozenset({7, 11}), frozenset({87})),
 (frozenset({11, 218}), frozenset({148})),
 (frozenset({6, 897}), frozenset({7})),
 (frozenset({7}), frozenset({87})),
 (frozense

In [18]:
a.liftFilter(12)

[(frozenset({6, 7, 11, 27}), frozenset({87})),
 (frozenset({6, 11, 987}), frozenset({148})),
 (frozenset({6, 7, 27}), frozenset({87})),
 (frozenset({11, 987}), frozenset({148})),
 (frozenset({1, 11, 218}), frozenset({148})),
 (frozenset({7, 11}), frozenset({87})),
 (frozenset({7, 27}), frozenset({87})),
 (frozenset({6, 7, 27}), frozenset({205})),
 (frozenset({11, 27, 218}), frozenset({148})),
 (frozenset({6, 7, 11}), frozenset({87})),
 (frozenset({1, 6, 11, 218}), frozenset({148})),
 (frozenset({7, 11, 27}), frozenset({87})),
 (frozenset({7, 11}), frozenset({897})),
 (frozenset({6, 11, 27, 218}), frozenset({148})),
 (frozenset({11, 27}), frozenset({87})),
 (frozenset({7, 27}), frozenset({205})),
 (frozenset({6, 11, 27}), frozenset({87})),
 (frozenset({6, 27, 218}), frozenset({11, 148})),
 (frozenset({205}), frozenset({7, 27})),
 (frozenset({6, 27}), frozenset({11, 87})),
 (frozenset({11, 27, 87}), frozenset({6, 7})),
 (frozenset({11, 148}), frozenset({1, 218})),
 (frozenset({1, 218}), 

In [19]:
a.liftFilter(15)

[(frozenset({6, 7, 11, 27}), frozenset({87})),
 (frozenset({6, 7, 27}), frozenset({87})),
 (frozenset({7, 27}), frozenset({87})),
 (frozenset({7, 11, 27}), frozenset({87})),
 (frozenset({11, 87}), frozenset({7, 27})),
 (frozenset({6, 7, 27}), frozenset({11, 87})),
 (frozenset({87}), frozenset({7, 27})),
 (frozenset({6, 87}), frozenset({7, 27})),
 (frozenset({7, 27}), frozenset({11, 87})),
 (frozenset({7, 27}), frozenset({6, 87})),
 (frozenset({6, 11, 87}), frozenset({7, 27})),
 (frozenset({7, 11, 27}), frozenset({6, 87})),
 (frozenset({6, 87}), frozenset({7, 11, 27})),
 (frozenset({87}), frozenset({6, 7, 27})),
 (frozenset({7, 27}), frozenset({6, 11, 87})),
 (frozenset({11, 87}), frozenset({6, 7, 27})),
 (frozenset({87}), frozenset({7, 11, 27})),
 (frozenset({87}), frozenset({6, 7, 11, 27}))]

In [20]:
a.liftFilter(17)

[(frozenset({6, 7, 11, 27}), frozenset({87})),
 (frozenset({6, 7, 27}), frozenset({87})),
 (frozenset({7, 27}), frozenset({87})),
 (frozenset({7, 11, 27}), frozenset({87})),
 (frozenset({11, 87}), frozenset({7, 27})),
 (frozenset({6, 7, 27}), frozenset({11, 87})),
 (frozenset({87}), frozenset({7, 27})),
 (frozenset({6, 87}), frozenset({7, 27})),
 (frozenset({7, 27}), frozenset({11, 87})),
 (frozenset({7, 27}), frozenset({6, 87})),
 (frozenset({6, 11, 87}), frozenset({7, 27})),
 (frozenset({7, 11, 27}), frozenset({6, 87})),
 (frozenset({6, 87}), frozenset({7, 11, 27})),
 (frozenset({87}), frozenset({6, 7, 27})),
 (frozenset({7, 27}), frozenset({6, 11, 87})),
 (frozenset({11, 87}), frozenset({6, 7, 27})),
 (frozenset({87}), frozenset({7, 11, 27})),
 (frozenset({87}), frozenset({6, 7, 11, 27}))]

In [21]:
a.leverageFilter(0.02)

[(frozenset({11}), frozenset({148})),
 (frozenset({1, 11, 148}), frozenset({218})),
 (frozenset({3, 218}), frozenset({148})),
 (frozenset({6, 148}), frozenset({11})),
 (frozenset({6, 11}), frozenset({148})),
 (frozenset({1, 11, 218}), frozenset({148})),
 (frozenset({3, 11}), frozenset({6})),
 (frozenset({11, 218}), frozenset({148})),
 (frozenset({6, 11, 218}), frozenset({148})),
 (frozenset({6, 11}), frozenset({7})),
 (frozenset({3, 6}), frozenset({11})),
 (frozenset({6, 7, 11}), frozenset({27})),
 (frozenset({6, 11}), frozenset({1})),
 (frozenset({6, 148}), frozenset({218})),
 (frozenset({6, 27}), frozenset({7})),
 (frozenset({6, 218}), frozenset({11})),
 (frozenset({6, 218}), frozenset({148})),
 (frozenset({218}), frozenset({148})),
 (frozenset({1, 11}), frozenset({6})),
 (frozenset({1, 6, 11, 148}), frozenset({218})),
 (frozenset({11, 148}), frozenset({6})),
 (frozenset({11}), frozenset({218})),
 (frozenset({148}), frozenset({218})),
 (frozenset({1, 6}), frozenset({11})),
 (frozense

In [22]:
a.leverageFilter(0.01)

[(frozenset({11, 148}), frozenset({987})),
 (frozenset({148}), frozenset({1})),
 (frozenset({11}), frozenset({83})),
 (frozenset({83}), frozenset({11})),
 (frozenset({7, 11, 218}), frozenset({148})),
 (frozenset({1, 6, 7}), frozenset({11})),
 (frozenset({1, 3, 11}), frozenset({6})),
 (frozenset({6, 11, 218}), frozenset({1})),
 (frozenset({6, 148}), frozenset({987})),
 (frozenset({3, 11, 148}), frozenset({218})),
 (frozenset({1, 148}), frozenset({11})),
 (frozenset({148, 218}), frozenset({1})),
 (frozenset({6, 11}), frozenset({83})),
 (frozenset({3, 6}), frozenset({218})),
 (frozenset({3, 6, 7}), frozenset({11})),
 (frozenset({1, 3, 218}), frozenset({148})),
 (frozenset({6, 11, 83}), frozenset({7})),
 (frozenset({3, 6, 218}), frozenset({11})),
 (frozenset({218}), frozenset({1})),
 (frozenset({7, 11}), frozenset({205})),
 (frozenset({6, 138}), frozenset({11})),
 (frozenset({6, 87}), frozenset({7})),
 (frozenset({987}), frozenset({148})),
 (frozenset({1, 11}), frozenset({7})),
 (frozenset

In [24]:
a.leverageFilter(0.005)

[(frozenset({11, 148}), frozenset({987})),
 (frozenset({438}), frozenset({6})),
 (frozenset({218}), frozenset({90})),
 (frozenset({6, 87}), frozenset({11})),
 (frozenset({148}), frozenset({1})),
 (frozenset({6, 7, 205}), frozenset({11})),
 (frozenset({6, 11, 27, 87}), frozenset({7})),
 (frozenset({218}), frozenset({64})),
 (frozenset({3, 11}), frozenset({77})),
 (frozenset({278}), frozenset({7})),
 (frozenset({11}), frozenset({314})),
 (frozenset({1, 6, 90}), frozenset({11})),
 (frozenset({6, 148}), frozenset({7})),
 (frozenset({11}), frozenset({83})),
 (frozenset({83}), frozenset({11})),
 (frozenset({7, 11, 218}), frozenset({148})),
 (frozenset({6, 504}), frozenset({11})),
 (frozenset({1, 6, 7}), frozenset({11})),
 (frozenset({1, 3, 148}), frozenset({11})),
 (frozenset({6, 7, 11, 27}), frozenset({87})),
 (frozenset({11, 218}), frozenset({64})),
 (frozenset({1, 3, 11}), frozenset({6})),
 (frozenset({6, 11, 218}), frozenset({1})),
 (frozenset({7, 11, 148}), frozenset({6})),
 (frozenset(

In [25]:
a.leverageFilter(0.003)

[(frozenset({11, 148}), frozenset({987})),
 (frozenset({438}), frozenset({6})),
 (frozenset({218}), frozenset({90})),
 (frozenset({6, 87}), frozenset({11})),
 (frozenset({148}), frozenset({1})),
 (frozenset({6, 7, 205}), frozenset({11})),
 (frozenset({6, 11, 27, 87}), frozenset({7})),
 (frozenset({218}), frozenset({64})),
 (frozenset({3, 11}), frozenset({77})),
 (frozenset({278}), frozenset({7})),
 (frozenset({11}), frozenset({314})),
 (frozenset({1, 6, 90}), frozenset({11})),
 (frozenset({49}), frozenset({6})),
 (frozenset({6, 148}), frozenset({7})),
 (frozenset({11}), frozenset({83})),
 (frozenset({83}), frozenset({11})),
 (frozenset({7, 11, 218}), frozenset({148})),
 (frozenset({6, 504}), frozenset({11})),
 (frozenset({27, 148, 218}), frozenset({6})),
 (frozenset({7, 11, 27, 87}), frozenset({6})),
 (frozenset({1, 6, 7}), frozenset({11})),
 (frozenset({1, 3, 148}), frozenset({11})),
 (frozenset({6, 7, 11, 27}), frozenset({87})),
 (frozenset({11, 218}), frozenset({64})),
 (frozenset({

In [27]:
a.leverageFilter(0.1)

[(frozenset({6}), frozenset({11})), (frozenset({11}), frozenset({6}))]

In [28]:
a.leverageFilter(0.05)

[(frozenset({6, 148}), frozenset({218})),
 (frozenset({6, 218}), frozenset({148})),
 (frozenset({218}), frozenset({148})),
 (frozenset({148}), frozenset({218})),
 (frozenset({6}), frozenset({11})),
 (frozenset({11}), frozenset({6})),
 (frozenset({148}), frozenset({6, 218})),
 (frozenset({218}), frozenset({6, 148}))]

In [41]:
a.occurence(87)

0.018670669352183127

Ciekawe:  
{ 7, 27 } $\to$ 87  
148 $\to$ 218  