# import pandas as pd
from itertools import combinations

# Dataset
dataset = [
    ['Coffee', 'Donut', 'Sandwich'],
    ['Coffee', 'Donut'],
    ['Coffee', 'Sandwich'],
    ['Coffee', 'Muffin'],
    ['Donut', 'Muffin']
]


all_items = sorted(set(item for transaction in dataset for item in transaction))
df = pd.DataFrame([[1 if item in trans else 0 for item in all_items] for trans in dataset],
                  columns=all_items)

print("One-Hot Encoded DataFrame:\n", df)


def get_support(itemset, df):
    mask = df[list(itemset)].all(axis=1)
    return mask.sum() / len(df)


In [8]:
 #Step 1: Find frequent itemsets
min_support = 0.4
frequent_itemsets = []

# Single items
for item in all_items:
    sup = get_support([item], df)
    if sup >= min_support:
        frequent_itemsets.append((frozenset([item]), sup))

# Pairs and triples
for size in range(2, len(all_items)+1):
    for comb in combinations(all_items, size):
        sup = get_support(comb, df)
        if sup >= min_support:
            frequent_itemsets.append((frozenset(comb), sup))

print("\nFrequent Itemsets (Support >= 0.4):")
for itemset, sup in frequent_itemsets:
    print(f"{set(itemset)} : {sup:.2f}")


Frequent Itemsets (Support >= 0.4):
{'Coffee'} : 0.80
{'Donut'} : 0.60
{'Muffin'} : 0.40
{'Sandwich'} : 0.40
{'Donut', 'Coffee'} : 0.40
{'Coffee', 'Sandwich'} : 0.40


In [9]:
# Step 2: Generate association rules
def generate_rules(frequent_itemsets, df, min_conf=0.1):
    rules = []
    for itemset, sup in frequent_itemsets:
        if len(itemset) > 1:
            for i in range(1, len(itemset)):
                for antecedent in combinations(itemset, i):
                    antecedent = frozenset(antecedent)
                    consequent = itemset - antecedent
                    if consequent:
                        sup_itemset = get_support(itemset, df)
                        sup_antecedent = get_support(antecedent, df)
                        sup_consequent = get_support(consequent, df)
                        conf = sup_itemset / sup_antecedent
                        lift = conf / sup_consequent
                        rules.append((antecedent, consequent, sup_itemset, conf, lift))
    return rules

rules = generate_rules(frequent_itemsets, df)
print("\nAssociation Rules:")
for ant, cons, sup, conf, lift in rules:
    print(f"{set(ant)} -> {set(cons)} | support={sup:.2f}, confidence={conf:.2f}, lift={lift:.2f}")



Association Rules:
{'Donut'} -> {'Coffee'} | support=0.40, confidence=0.67, lift=0.83
{'Coffee'} -> {'Donut'} | support=0.40, confidence=0.50, lift=0.83
{'Coffee'} -> {'Sandwich'} | support=0.40, confidence=0.50, lift=1.25
{'Sandwich'} -> {'Coffee'} | support=0.40, confidence=1.00, lift=1.25


In [10]:

# Step 3: Filter strong rules
min_conf = 0.6
strong_rules = [(a, c, s, conf, l) for (a, c, s, conf, l) in rules if s >= min_support and conf >= min_conf]

print("\nStrong Rules (Support >= 0.4, Confidence >= 0.6):")
for ant, cons, sup, conf, lift in strong_rules:
    print(f"{set(ant)} -> {set(cons)} | support={sup:.2f}, confidence={conf:.2f}, lift={lift:.2f}")



Strong Rules (Support >= 0.4, Confidence >= 0.6):
{'Donut'} -> {'Coffee'} | support=0.40, confidence=0.67, lift=0.83
{'Sandwich'} -> {'Coffee'} | support=0.40, confidence=1.00, lift=1.25
