In [14]:
import numpy as np
import pandas as pd
import random
from itertools import combinations
from mlxtend.frequent_patterns import apriori, association_rules

In [15]:
items = [f"i{i}" for i in range(1, 41)]

def generate_transactions(num_transactions=20):
    transactions = []
    for _ in range(num_transactions):
        num_items = random.randint(1, 7)  
        transaction = set(random.sample(items, num_items))  
        transactions.append(transaction)
    return transactions

In [16]:
datasets = [generate_transactions() for _ in range(4)]
for i, dataset in enumerate(datasets, 1):
    print(f"\nDataset {i}:")
    for idx, transaction in enumerate(dataset, 1):
        print(f"T{idx}: {transaction}")


Dataset 1:
T1: {'i8', 'i39', 'i35', 'i16', 'i7'}
T2: {'i22', 'i10', 'i31', 'i17', 'i9', 'i26'}
T3: {'i6', 'i17'}
T4: {'i9'}
T5: {'i31', 'i12', 'i18', 'i32'}
T6: {'i5', 'i25', 'i22', 'i7'}
T7: {'i25', 'i39', 'i28', 'i12', 'i9'}
T8: {'i36', 'i24'}
T9: {'i25', 'i40', 'i8', 'i31', 'i32', 'i6', 'i37'}
T10: {'i36', 'i22', 'i20', 'i38', 'i23', 'i30', 'i11'}
T11: {'i39', 'i19', 'i40', 'i15'}
T12: {'i3'}
T13: {'i22', 'i35'}
T14: {'i31', 'i33', 'i26'}
T15: {'i4', 'i15'}
T16: {'i8', 'i19', 'i1', 'i26'}
T17: {'i30'}
T18: {'i3', 'i23', 'i30'}
T19: {'i10', 'i21', 'i13', 'i5', 'i16', 'i35', 'i7'}
T20: {'i25', 'i33', 'i34', 'i24', 'i23', 'i11', 'i18'}

Dataset 2:
T1: {'i25', 'i22', 'i11', 'i33', 'i28'}
T2: {'i30'}
T3: {'i5', 'i12', 'i1', 'i30'}
T4: {'i33'}
T5: {'i2', 'i1', 'i32', 'i6', 'i26'}
T6: {'i9', 'i16', 'i33', 'i8'}
T7: {'i22', 'i34', 'i21', 'i31', 'i16', 'i2', 'i19'}
T8: {'i19'}
T9: {'i21', 'i24', 'i5', 'i39', 'i37'}
T10: {'i36', 'i34', 'i8', 'i17', 'i14'}
T11: {'i39', 'i17'}
T12: {'i34', 'i3

In [17]:
def get_support(itemset, transactions):
    return sum(1 for t in transactions if itemset.issubset(t)) / len(transactions)

In [18]:
def custom_apriori(transactions, min_support=0.2, min_confidence=0.4):
    items = {item for t in transactions for item in t}
    frequent_itemsets = []
    candidates = [frozenset([item]) for item in items] 

    # frequent itemsets
    k = 1
    while candidates:
        frequent = [itemset for itemset in candidates if get_support(itemset, transactions) >= min_support]
        if not frequent:
            break
        frequent_itemsets.append(frequent)
        k += 1
        candidates = [a | b for i, a in enumerate(frequent) for b in frequent[i + 1:] if len(a | b) == k]

    # association rules
    rules = []
    for freq_set in frequent_itemsets[1:]: 
        for itemset in freq_set:
            for i in range(1, len(itemset)):
                for subset in map(frozenset, combinations(itemset, i)):
                    confidence = get_support(itemset, transactions) / get_support(subset, transactions)
                    if confidence >= min_confidence:
                        rules.append((subset, itemset - subset, confidence))

    return frequent_itemsets, rules

In [19]:
def encode_transactions(transactions, item_list):
    return pd.DataFrame([{item: (item in t) for item in item_list} for t in transactions])

In [20]:
datasets = [
    [ {'i1', 'i2', 'i3'},
      {'i1', 'i2', 'i4'},
      {'i1', 'i2', 'i3', 'i4'},
      {'i1', 'i3', 'i5'},
      {'i1', 'i2', 'i3', 'i5'},
      {'i1', 'i2', 'i4', 'i5'},
      {'i2', 'i3', 'i4'},
      {'i1', 'i3', 'i4', 'i5'},
      {'i2', 'i3', 'i5'},
      {'i1', 'i2', 'i4'} ],
    
    [ {'i1', 'i2', 'i3'},
      {'i1', 'i2', 'i4'},
      {'i1', 'i2', 'i3', 'i4'},
      {'i1', 'i3', 'i5'},
      {'i1', 'i2', 'i3', 'i5'},
      {'i1', 'i2', 'i4', 'i5'},
      {'i2', 'i3', 'i4'},
      {'i1', 'i3', 'i4', 'i5'},
      {'i2', 'i3', 'i5'},
      {'i1', 'i2', 'i4'} ],

    [ {'i1', 'i2', 'i3'},
      {'i1', 'i2', 'i4'},
      {'i1', 'i2', 'i3', 'i4'},
      {'i1', 'i3', 'i5'},
      {'i1', 'i2', 'i3', 'i5'},
      {'i1', 'i2', 'i4', 'i5'},
      {'i2', 'i3', 'i4'},
      {'i1', 'i3', 'i4', 'i5'},
      {'i2', 'i3', 'i5'},
      {'i1', 'i2', 'i4'} ],

    [ {'i1', 'i2', 'i3'},
      {'i1', 'i2', 'i4'},
      {'i1', 'i2', 'i3', 'i4'},
      {'i1', 'i3', 'i5'},
      {'i1', 'i2', 'i3', 'i5'},
      {'i1', 'i2', 'i4', 'i5'},
      {'i2', 'i3', 'i4'},
      {'i1', 'i3', 'i4', 'i5'},
      {'i2', 'i3', 'i5'},
      {'i1', 'i2', 'i4'} ]
]

support_confidence_pairs = [(0.2, 0.4), (0.5, 0.7), (0.8, 0.9)]

In [21]:
results = []
for i, transactions in enumerate(datasets, 1):
    df = encode_transactions(transactions, items)
    
    for min_support, min_conf in support_confidence_pairs:
        freq_itemsets_custom, rules_custom = custom_apriori(transactions, min_support, min_conf)
        
        # MLxtend Apriori
        frequent_itemsets_mlxtend = apriori(df, min_support=min_support, use_colnames=True)
        rules_mlxtend = association_rules(frequent_itemsets_mlxtend, metric="confidence", min_threshold=min_conf)
        
        results.append({
            "Dataset": i,
            "Min Support": min_support,
            "Min Confidence": min_conf,
            "Custom Frequent Itemsets": sum(len(f) for f in freq_itemsets_custom),
            "Custom Rules": len(rules_custom),
            "MLxtend Frequent Itemsets": len(frequent_itemsets_mlxtend),
            "MLxtend Rules": len(rules_mlxtend)
        })
results = pd.DataFrame(results)

In [22]:
results

Unnamed: 0,Dataset,Min Support,Min Confidence,Custom Frequent Itemsets,Custom Rules,MLxtend Frequent Itemsets,MLxtend Rules
0,1,0.2,0.4,39,114,23,50
1,1,0.5,0.7,10,6,10,6
2,1,0.8,0.9,2,0,2,0
3,2,0.2,0.4,39,114,23,50
4,2,0.5,0.7,10,6,10,6
5,2,0.8,0.9,2,0,2,0
6,3,0.2,0.4,39,114,23,50
7,3,0.5,0.7,10,6,10,6
8,3,0.8,0.9,2,0,2,0
9,4,0.2,0.4,39,114,23,50
