In [2]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [3]:
dataset = [
    ['Coffee', 'Donut', 'Sandwich'],
    ['Coffee', 'Donut'],
    ['Coffee', 'Sandwich'],
    ['Coffee', 'Muffin'],
    ['Donut', 'Muffin']
]

In [4]:
# Step 2: One-hot encoding
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

print("One-hot encoded dataframe:")
print(df)


One-hot encoded dataframe:
   Coffee  Donut  Muffin  Sandwich
0    True   True   False      True
1    True   True   False     False
2    True  False   False      True
3    True  False    True     False
4   False   True    True     False


In [6]:
# Step 3: find frequent itemsets
min_support = 0.4
freq_itemsets = apriori(df, min_support=min_support, use_colnames=True)

# sort for neatness
freq_itemsets = freq_itemsets.sort_values('support', ascending=False).reset_index(drop=True)
print("Frequent itemsets (support >= {:.2f}):\n".format(min_support), freq_itemsets)


Frequent itemsets (support >= 0.40):
    support            itemsets
0      0.8            (Coffee)
1      0.6             (Donut)
2      0.4            (Muffin)
3      0.4          (Sandwich)
4      0.4     (Coffee, Donut)
5      0.4  (Coffee, Sandwich)


In [7]:
# Step 4: generate all possible rules (no confidence filter)
all_rules = association_rules(freq_itemsets, metric="confidence", min_threshold=0.0)

# Keep only useful columns and format antecedent/consequent as strings for readability
rules = all_rules[['antecedents','consequents','support','confidence','lift']].copy()
rules['antecedents'] = rules['antecedents'].apply(lambda s: ', '.join(list(s)))
rules['consequents'] = rules['consequents'].apply(lambda s: ', '.join(list(s)))

# Sort by confidence for inspection
rules = rules.sort_values('confidence', ascending=False).reset_index(drop=True)
print("All rules derived from frequent itemsets:\n")
print(rules.to_string(index=False))


All rules derived from frequent itemsets:

antecedents consequents  support  confidence     lift
   Sandwich      Coffee      0.4    1.000000 1.250000
      Donut      Coffee      0.4    0.666667 0.833333
     Coffee       Donut      0.4    0.500000 0.833333
     Coffee    Sandwich      0.4    0.500000 1.250000


In [8]:
# Step 5: filter rules by support & confidence thresholds
min_support = 0.4   # same as used earlier (this is support of the whole rule i.e. union)
min_confidence = 0.6

filtered_rules = rules[(rules['support'] >= min_support) & (rules['confidence'] >= min_confidence)].copy()
print("Rules with support >= {:.2f} and confidence >= {:.2f}:\n".format(min_support, min_confidence))
if filtered_rules.empty:
    print("No rules satisfy both thresholds.")
else:
    print(filtered_rules.to_string(index=False))
    # pretty print
    print("\nPretty:")
    for _, r in filtered_rules.iterrows():
        print(f"{r['antecedents']} -> {r['consequents']}  (support={r['support']:.2f}, conf={r['confidence']:.2f}, lift={r['lift']:.2f})")


Rules with support >= 0.40 and confidence >= 0.60:

antecedents consequents  support  confidence     lift
   Sandwich      Coffee      0.4    1.000000 1.250000
      Donut      Coffee      0.4    0.666667 0.833333

Pretty:
Sandwich -> Coffee  (support=0.40, conf=1.00, lift=1.25)
Donut -> Coffee  (support=0.40, conf=0.67, lift=0.83)


In [9]:
# Step 6: Interpret one strong rule in words
if not filtered_rules.empty:
    # choose the rule with max confidence
    best_rule = filtered_rules.iloc[filtered_rules['confidence'].idxmax()]
    antecedent = best_rule['antecedents']
    consequent = best_rule['consequents']
    conf = best_rule['confidence']
    lift = best_rule['lift']
    
    print("Strongest Rule Interpretation:")
    print(f"If a customer buys {', '.join(list(antecedent))}, they are likely to also buy {', '.join(list(consequent))}.")
    print(f"(Confidence = {conf:.2f}, Lift = {lift:.2f})")


Strongest Rule Interpretation:
If a customer buys S, a, n, d, w, i, c, h, they are likely to also buy C, o, f, f, e, e.
(Confidence = 1.00, Lift = 1.25)


In [10]:
# Step 7: Experiment with min_support and min_confidence
supports = [0.2, 0.4, 0.6]
confidences = [0.4, 0.6, 0.8]

print("Experimenting with thresholds:")
for s in supports:
    freq_itemsets = apriori(df, min_support=s, use_colnames=True)
    for c in confidences:
        rules = association_rules(freq_itemsets, metric="confidence", min_threshold=c)
        print(f"min_support={s}, min_confidence={c} -> {len(rules)} rules")


Experimenting with thresholds:
min_support=0.2, min_confidence=0.4 -> 11 rules
min_support=0.2, min_confidence=0.6 -> 3 rules
min_support=0.2, min_confidence=0.8 -> 2 rules
min_support=0.4, min_confidence=0.4 -> 4 rules
min_support=0.4, min_confidence=0.6 -> 2 rules
min_support=0.4, min_confidence=0.8 -> 1 rules
min_support=0.6, min_confidence=0.4 -> 0 rules
min_support=0.6, min_confidence=0.6 -> 0 rules
min_support=0.6, min_confidence=0.8 -> 0 rules
