In [41]:
import pandas as pd
import numpy as np

def generate_candidates(frequent_itemsets, k):
    """
    Generate candidate itemsets of length k.
    """
    candidates = set()
    itemsets = list(frequent_itemsets)
    for i in range(len(itemsets)):
        for j in range(i + 1, len(itemsets)):
            candidate = itemsets[i].union(itemsets[j])
            if len(candidate) == k:
                # Convert to a sorted tuple to make it hashable
                candidates.add(tuple(sorted(candidate)))
    return [set(candidate) for candidate in candidates]

def get_frequent_itemsets(transactions, candidates, min_support, num_transactions):
    """
    Get the frequent itemsets among the candidate itemsets.
    """
    itemset_counts = np.zeros(len(candidates))
    
    for i, itemset in enumerate(candidates):
        for transaction in transactions:
            if itemset.issubset(transaction):
                itemset_counts[i] += 1
    
    # Convert counts to support values
    support = itemset_counts / num_transactions
    
    # Filter candidates by minimum support
    frequent_itemsets = [candidates[i] for i in range(len(candidates)) if support[i] >= min_support]
    
    return frequent_itemsets

def apriori(df, min_support):
    """
    Apriori algorithm to find frequent itemsets using pandas and numpy.
    """
    transactions = df.apply(lambda row: set(row.dropna()), axis=1).tolist()
    num_transactions = len(transactions)
    
    items = set(item for transaction in transactions for item in transaction)
    itemsets = [{item} for item in items]
    
    frequent_itemsets = []
    k = 1
    while itemsets:
        # Get frequent itemsets of length k
        frequent_k_itemsets = get_frequent_itemsets(transactions, itemsets, min_support, num_transactions)
        frequent_itemsets.extend(frequent_k_itemsets)
        
        # Generate candidate itemsets of length k+1
        k += 1
        itemsets = generate_candidates(frequent_k_itemsets, k)
    
    return frequent_itemsets

# Example usage
data = {
    'Transaction 1': ['milk', 'bread', 'butter', None],
    'Transaction 2': ['bread', 'butter', None, None],
    'Transaction 3': ['milk', 'bread', None, None],
    'Transaction 4': ['milk', 'butter', None, None],
    'Transaction 5': ['bread', 'butter', 'jam', None],
}

df = pd.DataFrame.from_dict(data, orient='index')

min_support = 0.4
frequent_itemsets = apriori(df, min_support)

print("Frequent Itemsets:")
for itemset in frequent_itemsets:
    print(itemset)


Frequent Itemsets:
{'butter'}
{'milk'}
{'bread'}
{'milk', 'bread'}
{'butter', 'milk'}
{'butter', 'bread'}
