In [71]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools

In [401]:
help(set)

Help on class set in module builtins:

class set(object)
 |  set() -> new empty set object
 |  set(iterable) -> new set object
 |  
 |  Build an unordered collection of unique elements.
 |  
 |  Methods defined here:
 |  
 |  __and__(self, value, /)
 |      Return self&value.
 |  
 |  __contains__(...)
 |      x.__contains__(y) <==> y in x.
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __gt__(self, value, /)
 |      Return self>value.
 |  
 |  __iand__(self, value, /)
 |      Return self&=value.
 |  
 |  __init__(self, /, *args, **kwargs)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __ior__(self, value, /)
 |      Return self|=value.
 |  
 |  __isub__(self, value, /)
 |      Return self-=value.
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __ixor__(self, value, /)
 |      Re

In [1]:
class Apriori:
    def __init__(self, min_supp, min_conf, products, transactions):
        self.min_supp = min_supp
        self.min_conf = min_conf
        self.products = products
        self.transactions = transactions
        self.trans_len = len(transactions)

    def occurence(self, p):
        occ = 0
        for t in self.transactions:
            occ += int(p in t)
        return occ/self.trans_len

    def supp(self, s):
        occ = 0
        for t in self.transactions:
            occ += int(s.issubset(t))
        return occ/self.trans_len
    
    def confidance(self, rule):
        return self.supp(rule[0] | rule[1])/self.supp(rule[0])
    
    def makeFrequentSets(self):
        F = list()
        F.append(frozenset({frozenset({p}) for p in self.products  if self.occurence(p) >= self.min_supp}))
        i = 1
#         print(F[0])
        while len(F[i-1]) > 0:
            C = { frozenset(L1 | L2) for L1 in F[i-1] for L2 in F[i-1] if len(L1 | L2) == i+1 and self.supp(L1 | L2) >= self.min_supp}
#             print(i,C)
            F.append(C)
            i += 1
        self.frequent_sets = list(itertools.chain.from_iterable(F))
        return self.frequent_sets
    
    def makeAssesRules(self, computeOther=True):
        R = list()
        R.append({ (s,frozenset()) for s in self.frequent_sets})
        i = 1
        while len(R[i-1]) > 0:
            C = {(X[0] - {pre},X[1] | {pre}) for X in R[i-1] for pre in X[0] if self.confidance((X[0] - {pre},X[1] | {pre})) >= self.min_conf }
#             print(C)
            R.append(C)
            i += 1
        self.asses_rules = list(itertools.chain.from_iterable(R))
        
        if computeOther:
            self.rulesLift = {}
            self.rulesLeverage = {}
            for r in self.asses_rules:
                self.rulesLift[r] = self.lift(r)
                self.rulesLeverage[r] = self.leverage(r)
        return self.asses_rules
        
    def lift(self, rule):
        return self.confidance(rule)/self.supp(rule[1])
    
    def liftFilter(self, val):
        return [r for r in self.asses_rules if self.lift(r) >= val]
    
    def leverage(self, rule):
        return self.supp(rule[0] | rule[1]) - self.supp(rule[0]) * self.supp(rule[1])
    
    def leverageFilter(self, val):
        return [r for r in self.asses_rules if self.leverage(r) >= val]

In [433]:
a = Apriori(0.5,0.7,[1,2,3], [{1,2}, {2,3}, {1,2}])
a.makeFrequentSets()
a.frequent_sets

[frozenset({2}), frozenset({1}), frozenset({1, 2})]

In [434]:
a.makeAssesRules()
a.asses_rules

[(frozenset({1}), frozenset()),
 (frozenset({2}), frozenset()),
 (frozenset({1, 2}), frozenset()),
 (frozenset({1}), frozenset({2})),
 (frozenset(), frozenset({2}))]

In [435]:
products = np.arange(10)
transactions = np.array([set(products[np.random.randint(0,10,np.random.randint(0,10))]) for i in range(100)])

In [436]:
a = Apriori(0.1, 0.2, products, transactions)

In [437]:
a.makeFrequentSets()

[frozenset({8}),
 frozenset({4}),
 frozenset({9}),
 frozenset({7}),
 frozenset({5}),
 frozenset({3}),
 frozenset({0}),
 frozenset({6}),
 frozenset({1}),
 frozenset({2}),
 frozenset({5, 7}),
 frozenset({3, 7}),
 frozenset({1, 9}),
 frozenset({3, 6}),
 frozenset({5, 6}),
 frozenset({1, 5}),
 frozenset({2, 6}),
 frozenset({2, 8}),
 frozenset({6, 9}),
 frozenset({1, 2}),
 frozenset({8, 9}),
 frozenset({0, 9}),
 frozenset({0, 6}),
 frozenset({0, 5}),
 frozenset({3, 5}),
 frozenset({3, 9}),
 frozenset({3, 4}),
 frozenset({5, 9}),
 frozenset({2, 3}),
 frozenset({0, 2}),
 frozenset({2, 5}),
 frozenset({1, 6}),
 frozenset({0, 1}),
 frozenset({4, 9}),
 frozenset({0, 4}),
 frozenset({5, 8}),
 frozenset({1, 7}),
 frozenset({3, 8}),
 frozenset({0, 8}),
 frozenset({1, 3}),
 frozenset({0, 3}),
 frozenset({2, 7}),
 frozenset({2, 4}),
 frozenset({2, 9}),
 frozenset({1, 8}),
 frozenset({0, 7}),
 frozenset({4, 6}),
 frozenset({6, 8}),
 frozenset({7, 9}),
 frozenset({1, 3, 5}),
 frozenset({1, 2, 8}),
 fro

In [441]:
a.makeAssesRules()
a.leverageFilter(0.05)

[(frozenset({2, 3}), frozenset({8})),
 (frozenset({3}), frozenset({2})),
 (frozenset({6}), frozenset({9})),
 (frozenset({1}), frozenset({5})),
 (frozenset({8}), frozenset({9})),
 (frozenset({3, 8}), frozenset({2})),
 (frozenset({9}), frozenset({8})),
 (frozenset({2}), frozenset({8})),
 (frozenset({3, 5}), frozenset({1})),
 (frozenset({9}), frozenset({6})),
 (frozenset({8}), frozenset({3})),
 (frozenset({3}), frozenset({8})),
 (frozenset({2}), frozenset({3})),
 (frozenset({8}), frozenset({2})),
 (frozenset({3, 6}), frozenset({8})),
 (frozenset({5}), frozenset({1})),
 (frozenset({8}), frozenset({3, 6})),
 (frozenset({2}), frozenset({3, 8})),
 (frozenset({1}), frozenset({3, 5})),
 (frozenset({8}), frozenset({2, 3}))]

In [442]:
allTransactions = [set([int(id.replace(',', '')) for id in line.split()][1:]) for line in open('/home/dominik/Dokumenty/Studia/Data-mining/Lista6-assiessing_rules/apriori/1000/1000-out1.csv')]

# data = pd.read_csv('/home/dominik/Dokumenty/Studia/Data-mining/Lista6-assiessing_rules/apriori/1000/1000-out1.csv', header=None)

In [443]:
allTransactions

[{7, 15, 44, 49},
 {1, 19},
 {1, 19},
 {3, 4, 15, 18, 35, 44},
 {2, 4, 7, 9, 23},
 {14, 21, 44},
 {4, 12, 31, 36, 44, 48},
 {15, 27, 28},
 {2, 28},
 {3, 18, 35},
 {23, 24, 40, 41, 43},
 {20, 43, 48},
 {49},
 {1, 19, 26},
 {5, 22, 39},
 {16, 32, 45},
 {4, 6, 9, 10, 16, 22},
 {1, 19, 23},
 {7, 11, 37, 45},
 {3, 18, 32, 35},
 {1, 8, 19, 47},
 {34, 39, 44},
 {13, 19},
 {4, 9, 38},
 {7, 22, 48},
 {7, 11, 14},
 {23, 24, 40, 41, 43},
 {9, 14},
 {0, 2, 42},
 {13, 35},
 {23},
 {8, 21, 25, 38},
 {4, 46},
 {23, 24, 40, 41, 43},
 {4, 17, 29, 47},
 {12, 31, 36},
 {14, 22, 26, 37, 44},
 {0, 16, 30, 32, 45, 47},
 {1, 11, 19, 25, 27, 29, 46},
 {15, 16, 18, 21, 26},
 {4, 10, 14},
 {3, 36},
 {23, 27, 28},
 {15, 21, 40},
 {10, 19, 25, 32},
 {11, 22, 44},
 {8},
 {0, 2, 46},
 {33, 42},
 {28, 39},
 {7, 17, 28},
 {1, 19},
 {32, 34},
 {0, 2, 46},
 {15, 30, 45},
 {39, 49},
 {46},
 {4, 9, 19},
 {0, 2, 16, 19, 46},
 {17, 21, 40},
 {2, 4, 6, 9, 39},
 {23, 24, 40, 41, 43},
 {13, 27, 28},
 {40},
 {12, 14, 44},
 {27

In [444]:
products = list(range(100))

In [449]:
a = Apriori(0.01,0.07,products, allTransactions)

In [450]:
a.makeFrequentSets()

[frozenset({49}),
 frozenset({38}),
 frozenset({21}),
 frozenset({19}),
 frozenset({33}),
 frozenset({22}),
 frozenset({44}),
 frozenset({5}),
 frozenset({3}),
 frozenset({28}),
 frozenset({48}),
 frozenset({29}),
 frozenset({41}),
 frozenset({30}),
 frozenset({39}),
 frozenset({20}),
 frozenset({42}),
 frozenset({32}),
 frozenset({13}),
 frozenset({11}),
 frozenset({14}),
 frozenset({23}),
 frozenset({4}),
 frozenset({34}),
 frozenset({17}),
 frozenset({6}),
 frozenset({47}),
 frozenset({18}),
 frozenset({40}),
 frozenset({1}),
 frozenset({31}),
 frozenset({12}),
 frozenset({24}),
 frozenset({25}),
 frozenset({26}),
 frozenset({16}),
 frozenset({9}),
 frozenset({7}),
 frozenset({10}),
 frozenset({0}),
 frozenset({2}),
 frozenset({37}),
 frozenset({35}),
 frozenset({15}),
 frozenset({8}),
 frozenset({45}),
 frozenset({43}),
 frozenset({46}),
 frozenset({36}),
 frozenset({27}),
 frozenset({31, 48}),
 frozenset({2, 46}),
 frozenset({32, 45}),
 frozenset({18, 35}),
 frozenset({7, 15}),
 f

In [451]:
a.makeAssesRules()

[(frozenset({5}), frozenset()),
 (frozenset({48}), frozenset()),
 (frozenset({29}), frozenset()),
 (frozenset({3, 18, 35}), frozenset()),
 (frozenset({30}), frozenset()),
 (frozenset({27}), frozenset()),
 (frozenset({2, 46}), frozenset()),
 (frozenset({27, 28}), frozenset()),
 (frozenset({33}), frozenset()),
 (frozenset({12}), frozenset()),
 (frozenset({23, 43}), frozenset()),
 (frozenset({3, 35}), frozenset()),
 (frozenset({17, 47}), frozenset()),
 (frozenset({24, 41}), frozenset()),
 (frozenset({8}), frozenset()),
 (frozenset({16, 45}), frozenset()),
 (frozenset({45}), frozenset()),
 (frozenset({23, 40}), frozenset()),
 (frozenset({12, 31, 36}), frozenset()),
 (frozenset({41}), frozenset()),
 (frozenset({42}), frozenset()),
 (frozenset({7, 49}), frozenset()),
 (frozenset({7, 37, 45}), frozenset()),
 (frozenset({0, 46}), frozenset()),
 (frozenset({47}), frozenset()),
 (frozenset({16}), frozenset()),
 (frozenset({15, 49}), frozenset()),
 (frozenset({40, 43}), frozenset()),
 (frozenset(

In [462]:
a.liftFilter(32)

[(frozenset({23, 40, 41}), frozenset({24, 43})),
 (frozenset({23, 24, 43}), frozenset({40, 41})),
 (frozenset({24, 43}), frozenset({40, 41})),
 (frozenset({23, 41, 43}), frozenset({24, 40})),
 (frozenset({40, 41}), frozenset({24, 43})),
 (frozenset({23, 40, 43}), frozenset({24, 41})),
 (frozenset({24, 43}), frozenset({23, 41})),
 (frozenset({24, 40, 41}), frozenset({23, 43})),
 (frozenset({24, 40, 43}), frozenset({23, 41})),
 (frozenset({40, 41}), frozenset({23, 43})),
 (frozenset({23, 43}), frozenset({40, 41})),
 (frozenset({24, 41, 43}), frozenset({23, 40})),
 (frozenset({23, 41}), frozenset({24, 43})),
 (frozenset({23, 40}), frozenset({24, 41, 43})),
 (frozenset({40, 41}), frozenset({23, 24, 43})),
 (frozenset({23, 43}), frozenset({24, 40, 41})),
 (frozenset({23, 41}), frozenset({24, 40, 43})),
 (frozenset({24, 40}), frozenset({23, 41, 43})),
 (frozenset({24, 43}), frozenset({23, 40, 41})),
 (frozenset({24, 41}), frozenset({23, 40, 43}))]