### Apriori with Pandas & NumPy

In [1]:
import pandas as pd
import numpy as np
from itertools import combinations


In [2]:
transactions = [
    ['milk', 'bread', 'nuts', 'apple'],
    ['milk', 'bread', 'nuts'],
    ['milk', 'bread'],
    ['milk', 'bread', 'apple'],
    ['bread', 'milk', 'apple'],
]

df = pd.DataFrame(transactions)
df

Unnamed: 0,0,1,2,3
0,milk,bread,nuts,apple
1,milk,bread,nuts,
2,milk,bread,,
3,milk,bread,apple,
4,bread,milk,apple,


In [3]:
transaction_list = [set(t.dropna()) for _, t in df.iterrows()]
transaction_list

[{'apple', 'bread', 'milk', 'nuts'},
 {'bread', 'milk', 'nuts'},
 {'bread', 'milk'},
 {'apple', 'bread', 'milk'},
 {'apple', 'bread', 'milk'}]

In [4]:
def get_support(itemset, transactions):
    count = 0
    for t in transactions:
        if itemset.issubset(t):
            count += 1
    return count


In [5]:
def generate_candidates(prev_frequent, k):
    items = set()
    for itemset in prev_frequent:
        items |= itemset
    return [set(c) for c in combinations(items, k)]


In [6]:
def apriori(transactions, min_support=2):
    frequent_itemsets = []
    k = 1

    # Step 1: Get all individual items
    items = sorted(set().union(*transactions))
    candidate_k = [set([item]) for item in items]

    while candidate_k:
        itemset_support = {}
        for itemset in candidate_k:
            support = get_support(itemset, transactions)
            if support >= min_support:
                itemset_support[frozenset(itemset)] = support

        # No more frequent itemsets
        if not itemset_support:
            break

        # Save frequent itemsets
        frequent_itemsets.extend([(set(k), v) for k, v in itemset_support.items()])

        # Generate candidates for next level
        prev_frequent = list(itemset_support.keys())
        k += 1
        candidate_k = generate_candidates(prev_frequent, k)

    return frequent_itemsets


In [7]:
frequent_items = apriori(transaction_list, min_support=3)

for itemset, support in frequent_items:
    print(f"Itemset: {itemset}, Support: {support}")


Itemset: {'apple'}, Support: 3
Itemset: {'bread'}, Support: 5
Itemset: {'milk'}, Support: 5
Itemset: {'bread', 'milk'}, Support: 5
Itemset: {'bread', 'apple'}, Support: 3
Itemset: {'milk', 'apple'}, Support: 3
Itemset: {'bread', 'milk', 'apple'}, Support: 3
