# FP-Growth algorithm

Frequent Pattern Trees

* **item-prefix tree** with nodes consisting of
    * **intem-name**
    * **count**
    * **node-link**
* **frequent item header table** with entries consisting of
    * **item-name**
    * **head of node link**

Goal $\Rightarrow$ provide compact representation of transaction databases

In [24]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

# Database D
D = [
    "facdgimq",
    "abcflmo",
    "bfhjow",
    "bcksq",
    "afcelqmn"
]

t = 3

class FP_Node():
    
    def __init__(self, name, count, link, T):
        self.Tree = T
        self.name = name
        self.count = count
        self.node_link = link
        self.children = {}
    
    def add(self, node):
        assert(node.name not in self.children)
        self.children[node.name] = node
        
    def find(self, item):
        if self.name == item:
            return self
        else:
            node = None
            for key in self.children:
                if node is None:
                    node = self.children[key].find(item)
                else:
                    break
            return node
        
    def debug(self):
        txt = "[(" + self.name + "):" + str(self.count) + " -- "
        for key, node in self.children.items():
            txt += node.debug()
        txt += "  ]"
        return txt

class FP_Tree():
    def __init__(self):
        self.table = FP_HeaderTable()
        self.root = FP_Node("null", 0, None, self)
    
    def gen_node(self, name, count, link):
        node = FP_Node(name, count, link, self)
        if not self.table.has(name):
            self.table.insert(name, node)
        return node
    
    def debug(self):
        print(self.root.debug())
    
    def find(self, item):
        return self.root.find(item)
        

class FP_HeaderTable():
    def __init__(self):
        self.table = {}
    
    def has(self, item):
        return item in self.table
    
    def insert(self, item, node):
        assert(item not in self.table)
        self.table[item] = node
        
    def get_last_element(self, item):
        assert(item in self.table)
        node = self.table[item]
        while node.node_link is not None:
            node = node.node_link
        return node
        
def insert_tree(X, T):
    assert(len(X) > 0)
    p = X[0]
    
    N = T.find(p)
    if N is None:
        N = T.Tree.gen_node(p, 1, None)
        last_node = T.Tree.table.get_last_element(p)
        if N is not last_node:  # otherwise: endless loop
            last_node.node_link = N
        
        T.add(N)
        
    else:
        N.count += 1
    
    if len(X) > 1:
        P = X[1:]
        insert_tree(P, N)
    
def fp_tree_construction(D, t):
    """
    Construct the FP-growth tree
    """
    N = len(D)
    
    # 1.) compute the set I' of frequent items and their support
    items = {}
    for transaction in D:
        for char in transaction:
            items[char] = items[char] + 1 if char in items else 1
    
    I_prime = []
    
    for item, count in items.items():
        if count >= t:
            I_prime.append((item, count/float(N)))
    
    # 2.) sort I' in support descending order
    I_prime = sorted(I_prime, key=lambda tup: 1-tup[1])
    
    # 3.) create the root of the FP-tree T with label null
    T = FP_Tree()
    
    print(I_prime)
    
    for X in D:
        # select the frequent items in X and sort them according
        # to the order in I_prime
        frequent_items = []
        for F,_ in I_prime:
            if F in X:
                frequent_items.append(F)
        
        insert_tree(frequent_items, T.root)
        
    
    T.debug()
            
fp_tree_construction(D, t)

[('c', 0.8), ('f', 0.8), ('q', 0.6), ('b', 0.6), ('m', 0.6), ('a', 0.6)]
[(null):0 -- [(c):4 -- [(f):4 -- [(q):3 -- [(m):2 -- [(a):2 --   ]  ][(b):1 --   ]  ][(b):2 -- [(m):1 -- [(a):1 --   ]  ]  ]  ]  ]  ]
