In [1]:
# mining_iris_basket.py
import pandas as pd
import random
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# ----------------------------
# Step 1: Generate synthetic data
# ----------------------------
items_pool = [
    'milk', 'bread', 'butter', 'beer', 'diapers', 'eggs', 'apples', 'bananas', 'chicken', 'rice',
    'cheese', 'yogurt', 'cereal', 'coffee', 'tea', 'juice', 'onions', 'tomatoes', 'potatoes', 'fish'
]

random.seed(42)  # reproducibility

transactions = []
for _ in range(30):  # 30 transactions
    basket_size = random.randint(3, 8)
    basket = random.sample(items_pool, basket_size)

    # Introduce some patterns
    if 'milk' in basket and 'bread' not in basket:
        basket.append('bread')
    if 'beer' in basket and 'diapers' not in basket:
        basket.append('diapers')

    transactions.append(basket)

# Display first few transactions
print("Sample Transactions:")
for t in transactions[:5]:
    print(t)

# ----------------------------
# Step 2: Encode data for Apriori
# ----------------------------
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# ----------------------------
# Step 3: Apply Apriori
# ----------------------------
frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

# Sort by lift
rules = rules.sort_values(by="lift", ascending=False)

# Save and display top 5 rules
rules.head(5).to_csv("top5_rules.csv", index=False)
print("\nTop 5 Rules by Lift:")
print(rules.head(5))

# ----------------------------
# Step 4: Example Rule Analysis
# ----------------------------
example_rule = rules.iloc[0]
analysis = f"""
Rule Analysis:
If a customer buys {list(example_rule['antecedents'])}, they are likely to also buy {list(example_rule['consequents'])}.
Support: {example_rule['support']:.2f}, Confidence: {example_rule['confidence']:.2f}, Lift: {example_rule['lift']:.2f}.
Implication: In a retail store, placing these items close together or offering bundle discounts could increase sales.
"""

print(analysis)

# ----------------------------
# Save all rules
# ----------------------------
rules.to_csv("association_rules.csv", index=False)
print("All rules saved as association_rules.csv")


Sample Transactions:
['beer', 'milk', 'chicken', 'bananas', 'onions', 'butter', 'yogurt', 'bread', 'diapers']
['tomatoes', 'butter', 'coffee', 'bread', 'milk', 'onions', 'beer', 'fish', 'diapers']
['fish', 'milk', 'tomatoes', 'apples', 'coffee', 'beer', 'bananas', 'bread', 'diapers']
['chicken', 'milk', 'eggs', 'coffee', 'cheese', 'diapers', 'butter', 'bread']
['cheese', 'beer', 'butter', 'cereal', 'diapers']

Top 5 Rules by Lift:
         antecedents       consequents  antecedent support  \
33            (milk)  (diapers, bread)            0.200000   
32  (diapers, bread)            (milk)            0.266667   
34           (bread)   (diapers, milk)            0.333333   
11           (bread)            (milk)            0.333333   
10            (milk)           (bread)            0.200000   

    consequent support  support  confidence  lift  representativity  leverage  \
33            0.266667      0.2        1.00  3.75               1.0  0.146667   
32            0.200000      0.