In [2]:
# üìò Simple FP-Growth Algorithm for Market Basket Analysis

# Step 1: Import required libraries

import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules

# Step 2: Create simple dataset (transactions)
# Each list = items purchased together by one customer
transactions = [
    ['milk', 'bread', 'butter'],
    ['bread', 'butter'],
    ['milk', 'bread'],
    ['milk', 'bread', 'butter', 'jam'],
    ['bread', 'jam'],
    ['milk', 'bread', 'butter'],
]




In [3]:
# Step 3: Convert transactions into one-hot encoded DataFrame
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)



In [4]:
# Step 4: Apply FP-Growth algorithm to find frequent itemsets
frequent_itemsets = fpgrowth(df, min_support=0.4, use_colnames=True)
print("üß∫ Frequent Itemsets:")
print(frequent_itemsets)



üß∫ Frequent Itemsets:
    support               itemsets
0  1.000000                (bread)
1  0.666667                 (milk)
2  0.666667               (butter)
3  0.666667          (milk, bread)
4  0.666667        (butter, bread)
5  0.500000         (butter, milk)
6  0.500000  (butter, milk, bread)


In [5]:
# Step 5: Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
print("\nüîó Association Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])




üîó Association Rules:
       antecedents      consequents   support  confidence   lift
0           (milk)          (bread)  0.666667        1.00  1.000
1         (butter)          (bread)  0.666667        1.00  1.000
2         (butter)           (milk)  0.500000        0.75  1.125
3           (milk)         (butter)  0.500000        0.75  1.125
4   (butter, milk)          (bread)  0.500000        1.00  1.000
5  (butter, bread)           (milk)  0.500000        0.75  1.125
6    (milk, bread)         (butter)  0.500000        0.75  1.125
7         (butter)    (milk, bread)  0.500000        0.75  1.125
8           (milk)  (butter, bread)  0.500000        0.75  1.125


  cert_metric = np.where(certainty_denom == 0, 0, certainty_num / certainty_denom)


In [6]:
# Step 6: Interpret top rules
print("\nüß† Interpretation:")
for i, row in rules.iterrows():
    print(f"If a customer buys {list(row['antecedents'])}, "
          f"they are likely to buy {list(row['consequents'])} "
          f"(confidence = {row['confidence']:.2f}, lift = {row['lift']:.2f})")


üß† Interpretation:
If a customer buys ['milk'], they are likely to buy ['bread'] (confidence = 1.00, lift = 1.00)
If a customer buys ['butter'], they are likely to buy ['bread'] (confidence = 1.00, lift = 1.00)
If a customer buys ['butter'], they are likely to buy ['milk'] (confidence = 0.75, lift = 1.12)
If a customer buys ['milk'], they are likely to buy ['butter'] (confidence = 0.75, lift = 1.12)
If a customer buys ['butter', 'milk'], they are likely to buy ['bread'] (confidence = 1.00, lift = 1.00)
If a customer buys ['butter', 'bread'], they are likely to buy ['milk'] (confidence = 0.75, lift = 1.12)
If a customer buys ['milk', 'bread'], they are likely to buy ['butter'] (confidence = 0.75, lift = 1.12)
If a customer buys ['butter'], they are likely to buy ['milk', 'bread'] (confidence = 0.75, lift = 1.12)
If a customer buys ['milk'], they are likely to buy ['butter', 'bread'] (confidence = 0.75, lift = 1.12)


In [7]:
# ‚úÖ FP-Growth: Frequent Itemsets + Association Rules (simple & short)

!pip -q install mlxtend

import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules

# -----------------------------
# 1) Sample transactions (replace with your data if needed)
# Each list is a customer basket (products bought together)
transactions = [
    ['milk','bread','eggs'],
    ['milk','bread'],
    ['bread','butter'],
    ['milk','eggs','cookies'],
    ['bread','eggs'],
    ['milk','bread','butter'],
    ['bread','cookies'],
    ['milk','bread','eggs','butter'],
    ['milk','cookies'],
    ['bread','butter','jam'],
]

# --- (Optional) Load from CSV ---
# If you have a CSV where each row is a basket like: "milk,bread,eggs"
# df_raw = pd.read_csv('baskets.csv', header=None)
# transactions = df_raw[0].apply(lambda x: [i.strip() for i in str(x).split(',')]).tolist()

# -----------------------------
# 2) One-hot encode the baskets
te = TransactionEncoder().fit(transactions)
df = pd.DataFrame(te.transform(transactions), columns=te.columns_)

# -----------------------------
# 3) FP-Growth: find frequent itemsets
# Adjust min_support (e.g., 0.2 to 0.5) to control how frequent is ‚Äúfrequent‚Äù
frequent_itemsets = fpgrowth(df, min_support=0.3, use_colnames=True)
frequent_itemsets = frequent_itemsets.sort_values('support', ascending=False)
print("üß∫ Frequent Itemsets:")
print(frequent_itemsets)

# -----------------------------
# 4) Generate association rules
# metric='confidence' is common; you can also use 'lift' or 'leverage'
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.6)
rules = rules.sort_values(['lift','confidence'], ascending=False)
print("\nüîó Association Rules (sorted by lift, confidence):")
print(rules[['antecedents','consequents','support','confidence','lift']])

# -----------------------------
# 5) Simple, human-readable interpretations
def nice(s): return ', '.join(list(s))
print("\nüß† Easy Interpretations:")
for _, r in rules.head(5).iterrows():
    print(f"If a basket has [{nice(r['antecedents'])}], "
          f"then it often also has [{nice(r['consequents'])}] "
          f"(conf={r['confidence']:.2f}, lift={r['lift']:.2f})")


üß∫ Frequent Itemsets:
   support         itemsets
0      0.8          (bread)
1      0.6           (milk)
2      0.4           (eggs)
3      0.4         (butter)
5      0.4    (milk, bread)
8      0.4  (butter, bread)
4      0.3        (cookies)
6      0.3     (milk, eggs)
7      0.3    (bread, eggs)

üîó Association Rules (sorted by lift, confidence):
  antecedents consequents  support  confidence      lift
1    (butter)     (bread)      0.4    1.000000  1.250000
2      (eggs)      (milk)      0.3    0.750000  1.250000
3      (eggs)     (bread)      0.3    0.750000  0.937500
0      (milk)     (bread)      0.4    0.666667  0.833333

üß† Easy Interpretations:
If a basket has [butter], then it often also has [bread] (conf=1.00, lift=1.25)
If a basket has [eggs], then it often also has [milk] (conf=0.75, lift=1.25)
If a basket has [eggs], then it often also has [bread] (conf=0.75, lift=0.94)
If a basket has [milk], then it often also has [bread] (conf=0.67, lift=0.83)
