In [5]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("psparks/instacart-market-basket-analysis")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'instacart-market-basket-analysis' dataset.
Path to dataset files: /kaggle/input/instacart-market-basket-analysis


In [6]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules
import warnings

warnings.filterwarnings('ignore', category=DeprecationWarning)

In [7]:
aisles = pd.read_csv(f"{path}/aisles.csv")
dept = pd.read_csv(f"{path}/departments.csv")
products = pd.read_csv(f"{path}/products.csv")
orders = pd.read_csv(f"{path}/orders.csv")
order_products__prior = pd.read_csv(f"{path}/order_products__prior.csv")
order_products__train = pd.read_csv(f"{path}/order_products__train.csv")

# Preview first 5 rows of one file
# aisles.head()

In [8]:
# ---------------------------
# 2. Helper to Build Order-Product Dataset
# ---------------------------
def prepare_order_products(order_subset, products, aisles, dept, orders):
    return (order_subset
        .merge(products, on="product_id", how="left")
        .merge(aisles, on="aisle_id", how="left")
        .merge(dept, on="department_id", how="left")
        .merge(orders[['order_id','user_id','order_dow','order_hour_of_day']],
               on="order_id", how="left"))

order_products_prior = prepare_order_products(order_products__prior, products, aisles, dept, orders)
order_products_train = prepare_order_products(order_products__train, products, aisles, dept, orders)


In [9]:
# ---------------------------
# 3. Rule Builder
# ---------------------------
def build_rules(order_products, group_col, max_orders=5000, min_support=0.01):
    transactions = order_products.groupby("order_id")[group_col].apply(list).tolist()
    transactions = transactions[:max_orders]

    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)

    frequent_itemsets = fpgrowth(df, min_support=min_support, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)
    rules = rules.sort_values(by=["confidence","lift"], ascending=False)

    return rules



In [10]:
# ---------------------------
# 4. Recommendation Function
# ---------------------------
def recommend(cart_items, rules, top_n=5):
    cart_items = set(cart_items)
    recs = []
    for _, row in rules.iterrows():
        if row['antecedents'].issubset(cart_items):
            for consequent in row['consequents']:
                if consequent not in cart_items:
                    reason = (f"Because you bought {', '.join(row['antecedents'])}, "
                              f"customers also often buy {consequent}")
                    recs.append((consequent, reason, row['confidence'], row['lift']))

    recs = sorted(recs, key=lambda x: (x[2], x[3]), reverse=True)

    seen, final_recs = set(), []
    for r in recs:
        if r[0] not in seen:
            final_recs.append(r)
            seen.add(r[0])
        if len(final_recs) >= top_n:
            break
    return final_recs



In [11]:
# ---------------------------
# 5. Build Rules Separately for Prior and Train
# ---------------------------
rules_prior_product = build_rules(order_products_prior, "product_name")
rules_train_product = build_rules(order_products_train, "product_name")


In [12]:
# ---------------------------
# 6. Test Recommendations
# ---------------------------
cart_products = {"Bag of Organic Bananas", "Whole Milk"}

print("\n=== PRIOR ORDERS: PRODUCT-LEVEL RECOMMENDATIONS ===")
for item, reason, conf, lift in recommend(cart_products, rules_prior_product, top_n=5):
    print(f"- {item} | {reason} (confidence={conf:.2f}, lift={lift:.2f})")

print("\n=== TRAIN ORDERS: PRODUCT-LEVEL RECOMMENDATIONS ===")
for item, reason, conf, lift in recommend(cart_products, rules_train_product, top_n=5):
    print(f"- {item} | {reason} (confidence={conf:.2f}, lift={lift:.2f})")


=== PRIOR ORDERS: PRODUCT-LEVEL RECOMMENDATIONS ===
- Organic Hass Avocado | Because you bought Bag of Organic Bananas, customers also often buy Organic Hass Avocado (confidence=0.17, lift=2.61)
- Organic Strawberries | Because you bought Bag of Organic Bananas, customers also often buy Organic Strawberries (confidence=0.14, lift=1.69)
- Organic Raspberries | Because you bought Bag of Organic Bananas, customers also often buy Organic Raspberries (confidence=0.12, lift=2.73)
- Organic Baby Spinach | Because you bought Bag of Organic Bananas, customers also often buy Organic Baby Spinach (confidence=0.11, lift=1.61)

=== TRAIN ORDERS: PRODUCT-LEVEL RECOMMENDATIONS ===
- Organic Strawberries | Because you bought Bag of Organic Bananas, customers also often buy Organic Strawberries (confidence=0.21, lift=2.42)
- Organic Hass Avocado | Because you bought Bag of Organic Bananas, customers also often buy Organic Hass Avocado (confidence=0.17, lift=3.22)
- Organic Baby Spinach | Because you b