In [1]:
pip install mlxtend pandas

Collecting mlxtend
  Downloading mlxtend-0.23.4-py3-none-any.whl.metadata (7.3 kB)
Downloading mlxtend-0.23.4-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 1.4/1.4 MB 13.4 MB/s eta 0:00:00
Installing collected packages: mlxtend
Successfully installed mlxtend-0.23.4
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

try:
    # --- Step 1 & 2: Load and Format Synthetic Data ---
    # No download needed! We'll create our own transaction data.
    # We will embed some obvious patterns to see if the algorithm can find them.
    transactions = [
        ['Milk', 'Bread', 'Butter'],
        ['Beer', 'Diapers', 'Chips', 'Soda'],
        ['Milk', 'Bread', 'Cereal'],
        ['Beer', 'Diapers', 'Chips', 'Nuts'],
        ['Milk', 'Bread', 'Yogurt'],
        ['Beer', 'Diapers', 'Chips'],
        ['Milk', 'Bread', 'Butter', 'Soda'],
        ['Chocolate', 'Soda', 'Nuts'],
        ['Beer', 'Diapers', 'Chips'],
        ['Milk', 'Cereal', 'Yogurt'],
        ['Bread', 'Butter', 'Yogurt'],
        ['Beer', 'Diapers', 'Chips', 'Soda'],
        ['Milk', 'Bread']
    ]
    print("--- Step 1 & 2: Transaction Data Created ---")
    print(f"Total transactions: {len(transactions)}\n")

    # The Apriori algorithm requires data in a one-hot encoded DataFrame.
    # The TransactionEncoder does this conversion for us.
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)
    
    print("One-Hot Encoded DataFrame for Apriori:")
    print(df)
    print("\n")

    # --- Step 3: Apply Apriori ---
    # Find frequent itemsets with a minimum support of 20%
    # This means the itemset must appear in at least 20% of all transactions.
    frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
    
    print("--- Step 3: Frequent Itemsets Found ---")
    print(frequent_itemsets)
    print("\n")
    
    # --- Step 4: Generate Rules ---
    # Generate association rules with a minimum confidence of 70%
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
    
    # Sort the rules by 'lift' to see the most interesting ones first
    rules = rules.sort_values(by='lift', ascending=False)
    
    print("--- Step 4: Association Rules Generated ---")
    print("Top Association Rules:")
    # We select the most relevant columns for a clear view
    print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])
    print("\n--- Analysis Complete ---")

except Exception as e:
    print(f"An error occurred: {e}")

--- Step 1 & 2: Transaction Data Created ---
Total transactions: 13

One-Hot Encoded DataFrame for Apriori:
     Beer  Bread  Butter  Cereal  Chips  Chocolate  Diapers   Milk   Nuts  \
0   False   True    True   False  False      False    False   True  False   
1    True  False   False   False   True      False     True  False  False   
2   False   True   False    True  False      False    False   True  False   
3    True  False   False   False   True      False     True  False   True   
4   False   True   False   False  False      False    False   True  False   
5    True  False   False   False   True      False     True  False  False   
6   False   True    True   False  False      False    False   True  False   
7   False  False   False   False  False       True    False  False   True   
8    True  False   False   False   True      False     True  False  False   
9   False  False   False    True  False      False    False   True  False   
10  False   True    True   False  False      