In [2]:
import pandas as pd
from itertools import combinations, chain

In [3]:
# Generate candidate itemsets of size k
def generate_candidates(frequent_itemsets, k):
    return list(combinations(set(chain.from_iterable(frequent_itemsets)), k))

# Filter itemsets based on support threshold
def filter_candidates(transactions, candidates, min_support):
    itemset_count = {}
    for transaction in transactions:
        for candidate in candidates:
            if set(candidate).issubset(transaction):
                itemset_count[candidate] = itemset_count.get(candidate, 0) + 1

    total_transactions = len(transactions)
    frequent_itemsets = {
        itemset: count / total_transactions 
        for itemset, count in itemset_count.items() 
        if (count / total_transactions) >= min_support
    }
    return frequent_itemsets

# Generate association rules
def generate_rules(frequent_itemsets, min_confidence):
    rules = []
    for itemset in frequent_itemsets:
        if len(itemset) < 2:
            continue
        subsets = list(chain.from_iterable(combinations(itemset, r) for r in range(1, len(itemset))))
        for subset in subsets:
            antecedent = subset
            consequent = tuple(set(itemset) - set(subset))
            if consequent:
                confidence = frequent_itemsets[itemset] / frequent_itemsets.get(antecedent, 1)
                if confidence >= min_confidence:
                    rules.append((antecedent, consequent, confidence))
    return rules


In [4]:
def apriori(transactions, min_support, min_confidence):
    transactions = [set(transaction) for transaction in transactions]
    frequent_itemsets = {}
    k = 1

    # Generate frequent itemsets
    while True:
        candidates = generate_candidates(frequent_itemsets.keys(), k) if frequent_itemsets else list(combinations(set(chain.from_iterable(transactions)), k))
        filtered_candidates = filter_candidates(transactions, candidates, min_support)
        if not filtered_candidates:
            break
        frequent_itemsets.update(filtered_candidates)
        k += 1

    # Generate rules
    rules = generate_rules(frequent_itemsets, min_confidence)
    return frequent_itemsets, rules


In [5]:
# Example dataset
dataset = [
    ['Milk', 'Bread', 'Butter'],
    ['Beer', 'Bread', 'Butter', 'Eggs'],
    ['Milk', 'Bread', 'Butter', 'Eggs'],
    ['Beer', 'Bread', 'Eggs'],
    ['Milk', 'Bread', 'Butter', 'Beer']
]

# Minimum support and confidence thresholds
min_support = 0.5
min_confidence = 0.7

# Run Apriori Algorithm
frequent_itemsets, rules = apriori(dataset, min_support, min_confidence)

# Display results
print("Frequent Itemsets:")
for itemset, support in frequent_itemsets.items():
    print(f"{itemset}: {support:.2f}")

print("\nAssociation Rules:")
for antecedent, consequent, confidence in rules:
    print(f"{antecedent} -> {consequent}: {confidence:.2f}")


Frequent Itemsets:
('Butter',): 0.80
('Milk',): 0.60
('Bread',): 1.00
('Beer',): 0.60
('Eggs',): 0.60
('Butter', 'Milk'): 0.60
('Butter', 'Bread'): 0.80
('Milk', 'Bread'): 0.60
('Beer', 'Bread'): 0.60
('Bread', 'Eggs'): 0.60
('Butter', 'Milk', 'Bread'): 0.60

Association Rules:
('Butter',) -> ('Milk',): 0.75
('Milk',) -> ('Butter',): 1.00
('Butter',) -> ('Bread',): 1.00
('Bread',) -> ('Butter',): 0.80
('Milk',) -> ('Bread',): 1.00
('Beer',) -> ('Bread',): 1.00
('Eggs',) -> ('Bread',): 1.00
('Butter',) -> ('Milk', 'Bread'): 0.75
('Milk',) -> ('Butter', 'Bread'): 1.00
('Butter', 'Milk') -> ('Bread',): 1.00
('Butter', 'Bread') -> ('Milk',): 0.75
('Milk', 'Bread') -> ('Butter',): 1.00
