<a href="https://colab.research.google.com/github/rimanoble04/DS-tutorial/blob/main/tutorial-9/apriori.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from itertools import combinations

def get_frequent_items(dataset, support_threshold):
    item_freq = {}
    for record in dataset:
        for item in record:
            itemset = frozenset([item])
            item_freq[itemset] = item_freq.get(itemset, 0) + 1
    return {item: count for item, count in item_freq.items() if count >= support_threshold}


def extend_itemsets(prev_freq_items, level):
    candidates = set()
    items = list(prev_freq_items)
    for i in range(len(items)):
        for j in range(i + 1, len(items)):
            unioned = items[i].union(items[j])
            if len(unioned) == level:
                candidates.add(unioned)
    return candidates


def prune_candidates(data, candidates, support_threshold):
    freq = {}
    for txn in data:
        for cand in candidates:
            if cand.issubset(txn):
                freq[cand] = freq.get(cand, 0) + 1
    return {itemset: count for itemset, count in freq.items() if count >= support_threshold}


def apriori_alt(data, min_support):
    current_freq = get_frequent_items(data, min_support)
    all_freq = dict(current_freq)
    level = 2

    while current_freq:
        candidates = extend_itemsets(current_freq.keys(), level)
        current_freq = prune_candidates(data, candidates, min_support)
        all_freq.update(current_freq)
        level += 1

    return all_freq


def extract_rules(freq_itemsets, min_conf):
    rules = []
    for itemset in freq_itemsets:
        if len(itemset) >= 2:
            subsets = [frozenset(x) for i in range(1, len(itemset)) for x in combinations(itemset, i)]
            for antecedent in subsets:
                consequent = itemset - antecedent
                if consequent:
                    conf = freq_itemsets[itemset] / freq_itemsets.get(antecedent, 1)
                    if conf >= min_conf:
                        rules.append((antecedent, consequent, conf))
    return rules



In [2]:

# Sample Dataset
dataset = [
    {'milk', 'bread', 'butter'},
    {'bread', 'beer'},
    {'milk', 'bread', 'beer', 'butter'},
    {'beer', 'butter'},
    {'bread', 'butter'}
]

#  Parameters
support_threshold = 2
confidence_threshold = 0.6

#  Run Apriori
frequent_sets = apriori_alt(dataset, support_threshold)
rules = extract_rules(frequent_sets, confidence_threshold)

#  Output
print(" Frequent Itemsets:")
for itemset, count in frequent_sets.items():
    print(f"{set(itemset)}: {count}")

print("\n Association Rules:")
for ant, cons, conf in rules:
    print(f"{set(ant)} => {set(cons)} (confidence: {conf:.2f})")


 Frequent Itemsets:
{'bread'}: 4
{'milk'}: 2
{'butter'}: 4
{'beer'}: 3
{'bread', 'milk'}: 2
{'butter', 'milk'}: 2
{'bread', 'butter'}: 3
{'bread', 'beer'}: 2
{'butter', 'beer'}: 2
{'bread', 'milk', 'butter'}: 2

 Association Rules:
{'milk'} => {'bread'} (confidence: 1.00)
{'milk'} => {'butter'} (confidence: 1.00)
{'bread'} => {'butter'} (confidence: 0.75)
{'butter'} => {'bread'} (confidence: 0.75)
{'beer'} => {'bread'} (confidence: 0.67)
{'beer'} => {'butter'} (confidence: 0.67)
{'milk'} => {'bread', 'butter'} (confidence: 1.00)
{'bread', 'milk'} => {'butter'} (confidence: 1.00)
{'bread', 'butter'} => {'milk'} (confidence: 0.67)
{'butter', 'milk'} => {'bread'} (confidence: 1.00)
