In [19]:
import pandas as pd
from itertools import combinations

In [20]:
df = pd.read_csv("market_basket.csv")

df.head()

Unnamed: 0,order,Apple,Bread,Butter,Cheese,Corn,Dill,Eggs,Ice cream,Kidney_Beans,Milk,Nutmeg,Onion,Sugar,Unicorn,Yogurt,Chocolate
0,0,False,True,False,False,True,True,False,True,False,False,False,False,True,False,True,True
1,1,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False
2,2,True,False,True,False,False,True,False,True,False,True,False,False,False,False,True,True
3,3,False,False,True,True,False,True,False,False,False,True,True,True,False,False,False,False
4,4,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [21]:
min_support = 0.2
min_confidence = 0.5
k = 2

df = df.drop('order', axis=1)
df.head()

Unnamed: 0,Apple,Bread,Butter,Cheese,Corn,Dill,Eggs,Ice cream,Kidney_Beans,Milk,Nutmeg,Onion,Sugar,Unicorn,Yogurt,Chocolate
0,False,True,False,False,True,True,False,True,False,False,False,False,True,False,True,True
1,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False
2,True,False,True,False,False,True,False,True,False,True,False,False,False,False,True,True
3,False,False,True,True,False,True,False,False,False,True,True,True,False,False,False,False
4,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [22]:
num_transactions = len(df)
def get_support(itemset, df):
    count = 0
    for _, row in df.iterrows():
        if all(row[item] for item in itemset):
            count += 1
    return count / num_transactions

In [23]:
items = df.columns.tolist()
frequent_itemsets = []
support_data = {} # store support for quick lookup
current_itemsets = []

In [24]:
for item in items:
    support = get_support([item], df)
    if support >= min_support:
        itemset = frozenset([item])
        current_itemsets.append(itemset)
        support_data[itemset] = support
        frequent_itemsets.append(itemset)

In [None]:
while current_itemsets:
    candidate_itemsets = list(combinations(set().union(*current_itemsets), k))
    next_itemsets = []
    for itemset in candidate_itemsets:
        itemset = frozenset(itemset)
        support = get_support(itemset, df)
        if support >= min_support:
            next_itemsets.append(itemset)
            support_data[itemset] = support
            frequent_itemsets.append(itemset)
    current_itemsets = next_itemsets
    k += 1

In [29]:
print("Frequent Itemsets:")
for itemset in frequent_itemsets:
    print(f"{set(itemset)}: support = {support_data[itemset]:.2f}")

Frequent Itemsets:
{'Apple'}: support = 0.38
{'Bread'}: support = 0.38
{'Butter'}: support = 0.42
{'Cheese'}: support = 0.40
{'Corn'}: support = 0.41
{'Dill'}: support = 0.40
{'Eggs'}: support = 0.38
{'Ice cream'}: support = 0.41
{'Kidney_Beans'}: support = 0.41
{'Milk'}: support = 0.41
{'Nutmeg'}: support = 0.40
{'Onion'}: support = 0.40
{'Sugar'}: support = 0.41
{'Unicorn'}: support = 0.39
{'Yogurt'}: support = 0.42
{'Chocolate'}: support = 0.42
{'Chocolate', 'Butter'}: support = 0.20
{'Chocolate', 'Ice cream'}: support = 0.20
{'Chocolate', 'Milk'}: support = 0.21
{'Ice cream', 'Butter'}: support = 0.21
{'Kidney_Beans', 'Butter'}: support = 0.20
{'Kidney_Beans', 'Cheese'}: support = 0.20


In [None]:
print("Association Rules:")
for itemset in frequent_itemsets:
    if len(itemset) < 2:
        continue  # need at least two items for a rule
    for i in range(1, len(itemset)):
        for antecedent in combinations(itemset, i):
            antecedent = frozenset(antecedent)
            consequent = itemset - antecedent
            if not consequent:
                continue
            support_itemset = support_data[itemset]
            support_antecedent = support_data.get(antecedent, 0)
            support_consequent = support_data.get(consequent, 0)
            
            if support_antecedent == 0:
                continue  # avoid divide-by-zero
            
            confidence = support_itemset / support_antecedent
            lift = confidence / support_consequent if support_consequent else 0
            
            if confidence >= min_confidence:
                print(f"{set(antecedent)} => {set(consequent)} | "
                      f"support: {support_itemset:.2f}, "
                      f"confidence: {confidence:.2f}, "
                      f"lift: {lift:.2f}")

Association Rules:
{'Chocolate'} => {'Milk'} | support: 0.21, confidence: 0.50, lift: 1.24
{'Milk'} => {'Chocolate'} | support: 0.21, confidence: 0.52, lift: 1.24
{'Ice cream'} => {'Butter'} | support: 0.21, confidence: 0.50, lift: 1.20
