In [83]:
import numpy as np
import pandas as pd
from collections import defaultdict

In [84]:
transactions = [
    ['I1', 'I2', 'I5'],
    ['I2', 'I4'],
    ['I2', 'I3'],
    ['I1', 'I2', 'I4'],
    ['I1', 'I3'],
    ['I2', 'I3'],
    ['I1', 'I3'],
    ['I1', 'I2', 'I3', 'I5'],
    ['I1', 'I2', 'I3']
]

In [85]:
min_support = 2 
item_counts = defaultdict(int)

In [86]:
for transaction in transactions:
    for item in transaction:
        item_counts[item] += 1

In [87]:
frequent_items = set()
for item, count in item_counts.items():
    if count >= min_support:
        frequent_items.add(item)

In [88]:
sorted_transactions = []
for transaction in transactions:
    filtered_transaction = []
    for item in transaction:
        if item in frequent_items:
            filtered_transaction.append(item)
    sorted_transaction = sorted(filtered_transaction, key=lambda x: item_counts[x], reverse=True)
    if len(sorted_transaction) > 0:
        sorted_transactions.append(sorted_transaction)

In [89]:
fp_tree = {'root': {}}
header_table = defaultdict(list)

for transaction in sorted_transactions:
    current_node = fp_tree['root']
    for item in transaction:
        if item in current_node:
            current_node[item]['count'] += 1
        else:
            current_node[item] = {'count': 1, 'children': {}}
            header_table[item].append(current_node[item])
        current_node = current_node[item]['children']

In [91]:
def traverse(node, path):
    for item in node:
        new_path = path + [item + ":" + str(node[item]['count'])]
        if len(node[item]['children']) == 0:
            print(" -> ".join(new_path))
        else:
            traverse(node[item]['children'], new_path)

In [92]:
for item, node in fp_tree['root'].items():
    traverse({item: node}, [])

I2:7 -> I1:4 -> I5:1
I2:7 -> I1:4 -> I4:1
I2:7 -> I1:4 -> I3:2 -> I5:1
I2:7 -> I4:1
I2:7 -> I3:2
I1:2 -> I3:2
