In [None]:
import itertools
from collections import defaultdict


In [None]:
# Load dataset: each line contains space-separated item IDs
def load_transactions(path):
    transactions = []
    with open(path, "r") as f:
        for line in f:
            items = line.strip().split()
            if items:
                transactions.append(frozenset(items))
    return transactions

transactions = load_transactions("dataset.dat")
print(f"Loaded {len(transactions)} transactions.")
transactions[:5]  # preview


In [None]:
def count_support(candidates, transactions):
    """Return dictionary: candidate_itemset -> support_count"""
    support = defaultdict(int)
    for transaction in transactions:
        for candidate in candidates:
            if candidate.issubset(transaction):
                support[candidate] += 1
    return support


In [None]:
def generate_L1(transactions, min_support):
    item_counts = defaultdict(int)

    for transaction in transactions:
        for item in transaction:
            item_counts[frozenset([item])] += 1

    L1 = {itemset: count for itemset, count in item_counts.items()
          if count >= min_support}
    
    return L1

# Example threshold; tune as needed
min_support = 1000

L1 = generate_L1(transactions, min_support)
print("Frequent 1-itemsets:", len(L1))
L1


In [None]:
def generate_candidates(prev_frequent_itemsets, k):
    """Generate Ck from L(k-1) via self-join and pruning"""
    prev_itemsets = list(prev_frequent_itemsets.keys())
    candidates = set()

    # Self-join
    for i in range(len(prev_itemsets)):
        for j in range(i + 1, len(prev_itemsets)):
            L1 = list(prev_itemsets[i])
            L2 = list(prev_itemsets[j])
            L1.sort(); L2.sort()

            # If first k-2 items are equal, join them
            if L1[:k-2] == L2[:k-2]:
                new_candidate = frozenset(set(prev_itemsets[i]) | set(prev_itemsets[j]))
                if len(new_candidate) == k:
                    
                    # Apriori prune:
                    # All (k-1)-subsets must be frequent
                    all_subsets_frequent = True
                    for subset in itertools.combinations(new_candidate, k-1):
                        if frozenset(subset) not in prev_frequent_itemsets:
                            all_subsets_frequent = False
                            break

                    if all_subsets_frequent:
                        candidates.add(new_candidate)

    return candidates


In [None]:
def apriori(transactions, min_support):
    # Step 1: L1
    frequent_itemsets = []
    Lk = generate_L1(transactions, min_support)
    frequent_itemsets.append(Lk)
    
    k = 2

    while True:
        print(f"Generating candidates for k = {k}")

        Ck = generate_candidates(Lk, k)
        if not Ck:
            break

        support_counts = count_support(Ck, transactions)

        # Filter by support threshold
        Lk = {itemset: count for itemset, count in support_counts.items()
              if count >= min_support}

        if not Lk:
            break

        frequent_itemsets.append(Lk)
        k += 1

    return frequent_itemsets

frequent_itemsets = apriori(transactions, min_support)


In [None]:
total = sum(len(level) for level in frequent_itemsets)

print(f"Total frequent itemsets: {total}\n")

for i, Lk in enumerate(frequent_itemsets, start=1):
    print(f"Level {i} — {len(Lk)} itemsets")
    for itemset, support in Lk.items():
        print(f"  {set(itemset)}  → support {support}")
    print()


In [None]:
# Generate association rules from frequent itemsets
def generate_association_rules(frequent_itemsets, min_confidence):
    rules = []

    # Flatten frequent itemsets into: itemset -> support
    all_frequents = {}
    for level in frequent_itemsets:
        all_frequents.update(level)

    for itemset, itemset_support in all_frequents.items():
        if len(itemset) < 2:
            continue  # can't split 1-itemset into a rule

        items = list(itemset)

        # Generate all non-empty proper subsets X ⊂ itemset
        for r in range(1, len(items)):
            for X in itertools.combinations(items, r):
                X = frozenset(X)
                Y = itemset - X

                if X in all_frequents:
                    confidence = itemset_support / all_frequents[X]

                    if confidence >= min_confidence:
                        rule = {
                            "X": X,
                            "Y": Y,
                            "support": itemset_support,
                            "confidence": confidence
                        }
                        rules.append(rule)
    return rules


In [None]:
min_confidence = 0.6

rules = generate_association_rules(frequent_itemsets, min_confidence)

print(f"Generated {len(rules)} association rules.")

rules_sorted = sorted(
    rules,
    key=lambda r: (r['confidence'], r['support']),
    reverse=True
)

for rule in rules_sorted[:20]:
    print(
        f"{set(rule['X'])} -> {set(rule['Y'])} "
        f"(conf={rule['confidence']:.3f}, support={rule['support']})"
    )

