In [1]:
from itertools import combinations

def get_itemsets(transactions, itemset_size):
    itemsets = set()
    for transaction in transactions:
        for combination in combinations(transaction, itemset_size):
            itemsets.add(frozenset(combination))
    return itemsets

def get_frequent_itemsets(transactions, itemsets, min_support):
    itemset_counts = {itemset: 0 for itemset in itemsets}
    for transaction in transactions:
        for itemset in itemsets:
            if itemset.issubset(transaction):
                itemset_counts[itemset] += 1

    frequent_itemsets = {itemset: count for itemset, count in itemset_counts.items() if count >= min_support}
    return frequent_itemsets, itemset_counts

def generate_new_combinations(frequent_itemsets, k):
    """Generate candidate k-itemsets from (k-1)-itemsets."""
    itemsets = set()
    frequent_items = list(frequent_itemsets)

    for i in range(len(frequent_items)):
        for j in range(i + 1, len(frequent_items)):
            union_set = frequent_items[i] | frequent_items[j]
            if len(union_set) == k:
                itemsets.add(union_set)

    return itemsets

def print_table(itemset_counts, min_support):
    print(f"{'Itemset':<30} {'Support':<10}")
    print("-" * 40)
    for itemset, count in itemset_counts.items():
        print(f"{str(set(itemset)):<30} {count:<10} {'✓' if count >= min_support else '✗'}")
    print()

def apriori(transactions, min_support):
    transactions = [frozenset(transaction) for transaction in transactions]
    itemsets = get_itemsets(transactions, 1)
    frequent_itemsets, itemset_counts = get_frequent_itemsets(transactions, itemsets, min_support)

    print(f"Step 1: Frequent 1-itemsets")
    print_table(itemset_counts, min_support)

    all_frequent_itemsets = dict(frequent_itemsets)

    k = 2
    while frequent_itemsets:
        itemsets = generate_new_combinations(frequent_itemsets.keys(), k)
        frequent_itemsets, itemset_counts = get_frequent_itemsets(transactions, itemsets, min_support)

        if frequent_itemsets:
            print(f"Step {k}: Frequent {k}-itemsets")
            print_table(itemset_counts, min_support)

        all_frequent_itemsets.update(frequent_itemsets)
        k += 1

    return all_frequent_itemsets

# Example
transactions = [
    ['milk', 'bread', 'butter'],
    ['bread', 'butter'],
    ['milk', 'bread'],
    ['milk', 'butter'],
    ['bread', 'butter'],
]

min_support = 2
frequent_itemsets = apriori(transactions, min_support)
print("Final Frequent Itemsets:")
print(frequent_itemsets)

Step 1: Frequent 1-itemsets
Itemset                        Support   
----------------------------------------
{'milk'}                       3          ✓
{'bread'}                      4          ✓
{'butter'}                     4          ✓

Step 2: Frequent 2-itemsets
Itemset                        Support   
----------------------------------------
{'milk', 'butter'}             2          ✓
{'bread', 'milk'}              2          ✓
{'bread', 'butter'}            3          ✓

Final Frequent Itemsets:
{frozenset({'milk'}): 3, frozenset({'bread'}): 4, frozenset({'butter'}): 4, frozenset({'milk', 'butter'}): 2, frozenset({'bread', 'milk'}): 2, frozenset({'bread', 'butter'}): 3}


In [3]:
def get_association_rules(frequent_itemsets, min_confidence):
    rules = []
    for itemset, support in frequent_itemsets.items():
        if len(itemset) > 1:
            for consequent_size in range(1, len(itemset)):
                for consequent in combinations(itemset, consequent_size):
                    antecedent = itemset - frozenset(consequent)
                    if antecedent:
                        antecedent_support = frequent_itemsets[antecedent]
                        confidence = support / antecedent_support
                        if confidence >= min_confidence:
                            rules.append((antecedent, frozenset(consequent), confidence))
    return rules

def print_association_rules(rules):
    print(f"{'Rule':<30} {'Confidence':<10}")
    print("-" * 40)
    for antecedent, consequent, confidence in rules:
        print(f"{str(set(antecedent))} -> {str(set(consequent)):<20} {confidence:<10.2f}")
    print()

# Example usage
min_confidence = 0.6
association_rules = get_association_rules(frequent_itemsets, min_confidence)
print("Association Rules:")
print_association_rules(association_rules)


Association Rules:
Rule                           Confidence
----------------------------------------
{'milk'} -> {'butter'}           0.67      
{'milk'} -> {'bread'}            0.67      
{'butter'} -> {'bread'}            0.75      
{'bread'} -> {'butter'}           0.75      



In [5]:
# Example 2
import random

items = ['milk', 'bread', 'butter', 'beer', 'diapers', 'eggs', 'cola', 'chips', 'chocolate']
transactions = []

for _ in range(100):
    transaction = random.sample(items, random.randint(2, 5))
    transactions.append(transaction)

min_support = 10  # Minimum support threshold
frequent_itemsets = apriori(transactions, min_support)

print("Final Frequent Itemsets:")
for itemset, support in frequent_itemsets.items():
    print(f"{set(itemset)}: {support}")

Step 1: Frequent 1-itemsets
Itemset                        Support   
----------------------------------------
{'beer'}                       42         ✓
{'milk'}                       36         ✓
{'bread'}                      50         ✓
{'eggs'}                       40         ✓
{'cola'}                       33         ✓
{'butter'}                     33         ✓
{'chips'}                      40         ✓
{'diapers'}                    42         ✓
{'chocolate'}                  39         ✓

Step 2: Frequent 2-itemsets
Itemset                        Support   
----------------------------------------
{'chips', 'milk'}              7          ✗
{'eggs', 'bread'}              17         ✓
{'chocolate', 'milk'}          15         ✓
{'bread', 'butter'}            16         ✓
{'beer', 'cola'}               13         ✓
{'bread', 'milk'}              17         ✓
{'eggs', 'chips'}              20         ✓
{'diapers', 'butter'}          10         ✓
{'eggs', 'chocolate'}        

In [6]:
association_rules = get_association_rules(frequent_itemsets, min_confidence)
print("Association Rules:")
print_association_rules(association_rules)

Association Rules:
Rule                           Confidence
----------------------------------------
{'chips', 'butter'} -> {'bread'}            0.69      
{'bread', 'butter'} -> {'chips'}            0.69      

