In [1]:
import pandas as pd

# Preprocessing the dataset
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header=None)
dataset = dataset.fillna(0)

transactions = []
for i in range(0, 7501):
    transactions.append([str(dataset.values[i, j]) for j in range(0, 20)])
    

In [4]:
# Implementing the Apriori algorithm
def create_candidates(transactions):
    """Create initial candidate itemsets from transactions."""
    candidates = set()
    for transaction in transactions:
        for item in transaction:
            candidates.add(frozenset([item]))
    return list(candidates)


In [5]:
def frequent_itemsets(transactions, min_support):
    """Generate frequent itemsets from transactions."""
    candidate_itemsets = create_candidates(transactions)
    itemsets_with_support = {}
    
    while candidate_itemsets:
        frequent_itemsets = []
        for itemset in candidate_itemsets:
            support = calculate_support(transactions, itemset)
            if support >= min_support:
                itemsets_with_support[itemset] = support
                frequent_itemsets.append(itemset)
        
        if not frequent_itemsets:
            break
        
        candidate_itemsets = generate_candidates(frequent_itemsets)
    
    return itemsets_with_support



In [6]:
def calculate_support(transactions, itemset):
    """Calculate support for an itemset in transactions."""
    count = 0
    for transaction in transactions:
        if itemset.issubset(transaction):
            count += 1
    return count / len(transactions)



In [7]:
def generate_candidates(frequent_itemsets):
    """Generate candidate itemsets from frequent itemsets."""
    candidates = set()
    for i in range(len(frequent_itemsets)):
        for j in range(i+1, len(frequent_itemsets)):
            itemset1 = frequent_itemsets[i]
            itemset2 = frequent_itemsets[j]
            union = itemset1.union(itemset2)
            if len(union) == len(itemset1) + 1:
                candidates.add(union)
    return list(candidates)



In [8]:

min_support = 0.03
frequent_itemsets = frequent_itemsets(transactions, min_support)
for itemset, support in frequent_itemsets.items():
    print(f"Itemset: {itemset}, Support: {support}")

Itemset: frozenset({'ground beef'}), Support: 0.09825356619117451
Itemset: frozenset({'salmon'}), Support: 0.04252766297826956
Itemset: frozenset({'chicken'}), Support: 0.05999200106652446
Itemset: frozenset({'eggs'}), Support: 0.17970937208372217
Itemset: frozenset({'butter'}), Support: 0.030129316091187842
Itemset: frozenset({'brownies'}), Support: 0.03372883615517931
Itemset: frozenset({'honey'}), Support: 0.047460338621517134
Itemset: frozenset({'fresh bread'}), Support: 0.043060925209972005
Itemset: frozenset({'burgers'}), Support: 0.0871883748833489
Itemset: frozenset({'cookies'}), Support: 0.08038928142914278
Itemset: frozenset({'avocado'}), Support: 0.03332888948140248
Itemset: frozenset({'shrimp'}), Support: 0.07145713904812692
Itemset: frozenset({'cake'}), Support: 0.08105585921877083
Itemset: frozenset({'tomatoes'}), Support: 0.06839088121583789
Itemset: frozenset({'chocolate'}), Support: 0.1638448206905746
Itemset: frozenset({'cooking oil'}), Support: 0.0510598586855086
Ite