In [1]:
# ðŸ“˜ Simple Apriori Algorithm for Association Rule Mining

import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules




In [2]:
# Step 2: Create a simple retail dataset (transactions)
transactions = [
    ['milk', 'bread', 'butter'],
    ['bread', 'butter'],
    ['milk', 'bread'],
    ['milk', 'bread', 'butter', 'jam'],
    ['bread', 'jam'],
    ['milk', 'bread', 'butter']
]



In [3]:
# Step 3: Convert transactions into one-hot encoded DataFrame
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)



In [4]:
# Step 4: Apply Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(df, min_support=0.4, use_colnames=True)
print("ðŸ§º Frequent Itemsets:")
print(frequent_itemsets)



ðŸ§º Frequent Itemsets:
    support               itemsets
0  1.000000                (bread)
1  0.666667               (butter)
2  0.666667                 (milk)
3  0.666667        (bread, butter)
4  0.666667          (bread, milk)
5  0.500000         (milk, butter)
6  0.500000  (bread, milk, butter)


In [5]:

# Step 5: Generate association rules (support, confidence, lift)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)
print("\nðŸ”— Association Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])




ðŸ”— Association Rules:
        antecedents      consequents   support  confidence   lift
0           (bread)         (butter)  0.666667    0.666667  1.000
1          (butter)          (bread)  0.666667    1.000000  1.000
2           (bread)           (milk)  0.666667    0.666667  1.000
3            (milk)          (bread)  0.666667    1.000000  1.000
4            (milk)         (butter)  0.500000    0.750000  1.125
5          (butter)           (milk)  0.500000    0.750000  1.125
6     (bread, milk)         (butter)  0.500000    0.750000  1.125
7   (bread, butter)           (milk)  0.500000    0.750000  1.125
8    (milk, butter)          (bread)  0.500000    1.000000  1.000
9            (milk)  (bread, butter)  0.500000    0.750000  1.125
10         (butter)    (bread, milk)  0.500000    0.750000  1.125


  cert_metric = np.where(certainty_denom == 0, 0, certainty_num / certainty_denom)


In [6]:
# Step 6: Interpret top rules
print("\nðŸ§  Interpretation:")
for i, row in rules.iterrows():
    print(f"If a customer buys {list(row['antecedents'])}, "
          f"they are likely to buy {list(row['consequents'])} "
          f"(confidence = {row['confidence']:.2f}, lift = {row['lift']:.2f})")


ðŸ§  Interpretation:
If a customer buys ['bread'], they are likely to buy ['butter'] (confidence = 0.67, lift = 1.00)
If a customer buys ['butter'], they are likely to buy ['bread'] (confidence = 1.00, lift = 1.00)
If a customer buys ['bread'], they are likely to buy ['milk'] (confidence = 0.67, lift = 1.00)
If a customer buys ['milk'], they are likely to buy ['bread'] (confidence = 1.00, lift = 1.00)
If a customer buys ['milk'], they are likely to buy ['butter'] (confidence = 0.75, lift = 1.12)
If a customer buys ['butter'], they are likely to buy ['milk'] (confidence = 0.75, lift = 1.12)
If a customer buys ['bread', 'milk'], they are likely to buy ['butter'] (confidence = 0.75, lift = 1.12)
If a customer buys ['bread', 'butter'], they are likely to buy ['milk'] (confidence = 0.75, lift = 1.12)
If a customer buys ['milk', 'butter'], they are likely to buy ['bread'] (confidence = 1.00, lift = 1.00)
If a customer buys ['milk'], they are likely to buy ['bread', 'butter'] (confidence = 0