### import scipy
import numpy as np

In [1]:
TRANSACTIONS = [[1, 2, 3, 4, 5], [1, 3, 5], [2, 3, 5], [1, 5], [1, 3, 4], [2, 3, 5], [2, 3, 5],
                    [3, 4, 5], [4, 5], [2], [2, 3], [2, 3, 4], [3, 4, 5]]

In [2]:
def load_dataset():
    """Load the sample data."""
    return TRANSACTIONS

In [3]:
def createC1(dataset):
    """Create the first l-itemset C1 of size one."""
    c1 = []
    for t in dataset:
        for i in t:
            if not [i] in c1:
                c1.append([i])
    c1.sort()
    return map(frozenset, c1)

In [4]:
def scanD(dataset, candidates, min_sup):
    """Return candidates l-itemsets satisfying minimum support."""
    csc = {}
    for t in dataset:
        for c in candidates:
            if c.issubset(t):
                csc.setdefault(c, 0)
                csc[c] += 1
    
    items_len = float(len(dataset))
    freq = []
    support_data = {}
    for key in csc:
        can_support = csc[key] / items_len
        if can_support >= min_sup:
            freq.insert(0, key)
        support_data[key] = can_support
    return freq, support_data

In [5]:
dataset = load_dataset()

In [None]:
can = createC1(dataset)

In [None]:
can

In [None]:
items, support = scanD(dataset, list(can), 0.5)

In [None]:
items

In [6]:
def joinSet(itemSet, length):
    """Join a set with itself."""
    return set([i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length])
    

In [None]:
l2 = joinSet(can, 2)

In [7]:
def apriori(dataset, min_sup=0.2):
    """Generation of frequent itemsets"""
    C1 = list(createC1(dataset))
    D = list(map(set, dataset))
    L1, support_data = scanD(D, C1, min_sup)
    L = [L1]
    k = 2
    while(len(L[k-2]) > 0):
        candidates = joinSet(L[k-2], k)
        Lk, suppK = scanD(D, candidates, min_sup)
        support_data.update(suppK)
        L.append(Lk)
        k += 1
    
    return L, support_data
        

In [8]:
L, support = apriori(dataset)

In [None]:
support

In [9]:
def confidence(itemset, H, support_data, rules, min_conf=0.7):
    """Evaluate the rule generated"""
    prunedSet = []
    for h in H:
        conf = support_data[itemset] / support_data[itemset - h]
        if conf >= min_conf:
            print(str(itemset-h) + "------>" + str(h) + " confidence: " + str(conf))
            rules.append((itemset-h, h, conf))
            prunedSet.append(h)
    return prunedSet

In [15]:
def rules_(itemset, H, support_data, rules, min_conf=0.7):
    """Generate a set of candidate rules."""
    m = len(H[0])
    if(len(itemset) > (m+1)):
        hmp = joinSet(H, m+1)
        hmp = confidence(itemset, hmp, support_data, rules, min_conf)
        if len(hmp) > 1:
            rules(itemset, hmp, support_data, rules, min_conf)

In [17]:
def generateRules(L, support_data, min_conf=0.7):
    """Generate the association rules."""
    rules = []
    for i in range(1, len(L)):
        for itemset in L[i]:
            H1 = [frozenset([item]) for item in itemset]
            print("FrequentSet " + str(itemset) + "H1 " + str(H1) + str(i))
            if(i > 1):
                rules_(itemset, H1, support_data, rules, min_conf)
            else:
                confidence(itemset, H1, support_data, rules, min_conf)
    return rules

In [18]:
generateRules(L, support)

FrequentSet frozenset({4, 5})H1 [frozenset({4}), frozenset({5})]1
FrequentSet frozenset({3, 5})H1 [frozenset({3}), frozenset({5})]1
frozenset({5})------>frozenset({3}) confidence: 0.7777777777777778
frozenset({3})------>frozenset({5}) confidence: 0.7
FrequentSet frozenset({1, 5})H1 [frozenset({1}), frozenset({5})]1
frozenset({1})------>frozenset({5}) confidence: 0.75
FrequentSet frozenset({1, 3})H1 [frozenset({1}), frozenset({3})]1
frozenset({1})------>frozenset({3}) confidence: 0.75
FrequentSet frozenset({2, 5})H1 [frozenset({2}), frozenset({5})]1
FrequentSet frozenset({2, 3})H1 [frozenset({2}), frozenset({3})]1
frozenset({2})------>frozenset({3}) confidence: 0.8571428571428572
FrequentSet frozenset({3, 4})H1 [frozenset({3}), frozenset({4})]1
frozenset({4})------>frozenset({3}) confidence: 0.8333333333333334
FrequentSet frozenset({2, 3, 5})H1 [frozenset({2}), frozenset({3}), frozenset({5})]2
FrequentSet frozenset({3, 4, 5})H1 [frozenset({3}), frozenset({4}), frozenset({5})]2


[(frozenset({5}), frozenset({3}), 0.7777777777777778),
 (frozenset({3}), frozenset({5}), 0.7),
 (frozenset({1}), frozenset({5}), 0.75),
 (frozenset({1}), frozenset({3}), 0.75),
 (frozenset({2}), frozenset({3}), 0.8571428571428572),
 (frozenset({4}), frozenset({3}), 0.8333333333333334)]

In [None]:
L