In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("psparks/instacart-market-basket-analysis")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: C:\Users\Qamar Hasan\.cache\kagglehub\datasets\psparks\instacart-market-basket-analysis\versions\1


In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules

# ---------------------------
# 1. Load Data
# ---------------------------
aisles = pd.read_csv(f"{path}/aisles.csv")
dept = pd.read_csv(f"{path}/departments.csv")
products = pd.read_csv(f"{path}/products.csv")
orders = pd.read_csv(f"{path}/orders.csv")
order_prior = pd.read_csv(f"{path}/order_products__prior.csv")
order_train = pd.read_csv(f"{path}/order_products__train.csv")

# ---------------------------
# 2. Merge Data
# ---------------------------
order_products = (order_prior
    .merge(products, on="product_id", how="left")
    .merge(aisles, on="aisle_id", how="left")
    .merge(dept, on="department_id", how="left")
    .merge(orders[['order_id','user_id','order_dow','order_hour_of_day']], 
           on="order_id", how="left"))

# ---------------------------
# 3. Recommendation Function
# ---------------------------
def build_rules(order_products, group_col, max_orders=5000, min_support=0.01):
    """Generate association rules at given granularity (product/aisle/department)."""
    # Build transactions
    transactions = order_products.groupby("order_id")[group_col].apply(list).tolist()
    transactions = transactions[:max_orders]

    # One-hot encode
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)

    # FP-Growth
    frequent_itemsets = fpgrowth(df, min_support=min_support, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)
    rules = rules.sort_values(by=["confidence","lift"], ascending=False)
# 
    return rules


In [4]:
def recommend(cart_items, rules, top_n=5):
    """Return recommendations with reasoning based on given rules."""
    cart_items = set(cart_items)
    recs = []
    for _, row in rules.iterrows():
        if row['antecedents'].issubset(cart_items):
            for consequent in row['consequents']:
                if consequent not in cart_items:
                    reason = (f"Because you bought {', '.join(row['antecedents'])}, "
                              f"customers also often buy {consequent}")
                    recs.append((consequent, reason, row['confidence'], row['lift']))

    recs = sorted(recs, key=lambda x: (x[2], x[3]), reverse=True)

    seen, final_recs = set(), []
    for r in recs:
        if r[0] not in seen:
            final_recs.append(r)
            seen.add(r[0])
        if len(final_recs) >= top_n:
            break
    return final_recs

# ---------------------------
# 4. Generate Rules for All Levels
# ---------------------------
rules_product = build_rules(order_products, "product_name")
rules_aisle = build_rules(order_products, "aisle")
rules_department = build_rules(order_products, "department")

# ---------------------------
# 5. Test Recommendations
# ---------------------------
cart_products = {"Bag of Organic Bananas", "Whole Milk"}
cart_aisles = {"fresh fruits", "yogurt"}
cart_departments = {"produce", "dairy"}

print("\n=== PRODUCT-LEVEL RECOMMENDATIONS ===")
for item, reason, conf, lift in recommend(cart_products, rules_product, top_n=5):
    print(f"- {item} | {reason} (confidence={conf:.2f}, lift={lift:.2f})")

print("\n=== AISLE-LEVEL RECOMMENDATIONS ===")
for item, reason, conf, lift in recommend(cart_aisles, rules_aisle, top_n=5):
    print(f"- {item} | {reason} (confidence={conf:.2f}, lift={lift:.2f})")

print("\n=== DEPARTMENT-LEVEL RECOMMENDATIONS ===")
for item, reason, conf, lift in recommend(cart_departments, rules_department, top_n=5):
    print(f"- {item} | {reason} (confidence={conf:.2f}, lift={lift:.2f})")


=== PRODUCT-LEVEL RECOMMENDATIONS ===
- Organic Hass Avocado | Because you bought Bag of Organic Bananas, customers also often buy Organic Hass Avocado (confidence=0.17, lift=2.61)
- Organic Strawberries | Because you bought Bag of Organic Bananas, customers also often buy Organic Strawberries (confidence=0.14, lift=1.69)
- Organic Raspberries | Because you bought Bag of Organic Bananas, customers also often buy Organic Raspberries (confidence=0.12, lift=2.73)
- Organic Baby Spinach | Because you bought Bag of Organic Bananas, customers also often buy Organic Baby Spinach (confidence=0.11, lift=1.61)

=== AISLE-LEVEL RECOMMENDATIONS ===
- fresh vegetables | Because you bought yogurt, fresh fruits, customers also often buy fresh vegetables (confidence=0.63, lift=1.42)
- packaged vegetables fruits | Because you bought yogurt, fresh fruits, customers also often buy packaged vegetables fruits (confidence=0.54, lift=1.47)
- milk | Because you bought yogurt, fresh fruits, customers also oft

In [5]:
# ---------------------------
# 6. Personalized Cart Fetch
# ---------------------------
def get_user_cart(user_id, order_products, group_col="product_name", last_only=True):
    """Fetch items from a user's last order or all past orders."""
    user_orders = order_products[order_products["user_id"] == user_id]
    
    if user_orders.empty:
        return set()  # no history for this user
    
    if last_only:
        # Get last order_id for this user
        last_order_id = user_orders["order_id"].max()
        cart = set(user_orders[user_orders["order_id"] == last_order_id][group_col])
    else:
        # Take union of all items user ever bought
        cart = set(user_orders[group_col])
    
    return cart

# ---------------------------
# 7. Example: Personalized Recommendation
# ---------------------------
user_id = 7  # change this to any user from orders.csv
user_cart = get_user_cart(user_id, order_products, group_col="product_name", last_only=True)

print(f"\n=== USER {user_id} CART ===")
print(user_cart)

if user_cart:
    print("\n=== PERSONALIZED PRODUCT-LEVEL RECOMMENDATIONS ===")
    for item, reason, conf, lift in recommend(user_cart, rules_product, top_n=5):
        print(f"- {item} | {reason} (confidence={conf:.2f}, lift={lift:.2f})")
else:
    print("No history available for this user.")



=== USER 7 CART ===
{'Uncured Slow Cooked Ham', 'Vanilla Coffee Concentrate', 'Organic Apple Slices', 'Mexican Coffee', 'Organic Strawberries', 'Lactose Free Fat Free Milk', 'Organic Half & Half', 'Antioxidant Infusions Ipanema Pomegranate Beverage', 'Antioxidant Infusions Beverage Malawi Mango', 'Soft Potato Bread'}

=== PERSONALIZED PRODUCT-LEVEL RECOMMENDATIONS ===
- Banana | Because you bought Organic Strawberries, customers also often buy Banana (confidence=0.23, lift=1.53)
- Bag of Organic Bananas | Because you bought Organic Strawberries, customers also often buy Bag of Organic Bananas (confidence=0.21, lift=1.69)
- Organic Hass Avocado | Because you bought Organic Strawberries, customers also often buy Organic Hass Avocado (confidence=0.16, lift=2.44)
- Organic Baby Spinach | Because you bought Organic Strawberries, customers also often buy Organic Baby Spinach (confidence=0.14, lift=2.05)
- Organic Raspberries | Because you bought Organic Strawberries, customers also often bu

In [7]:
def get_user_cart(user_id, order_products, group_col="product_name", last_only=True):
    """Fetch items from a user's last order or all past orders."""
    user_orders = order_products[order_products["user_id"] == user_id]
    
    if user_orders.empty:
        return set()  
    
    if last_only:
        last_order_id = user_orders["order_id"].max()
        cart = set(user_orders[user_orders["order_id"] == last_order_id][group_col])
    else:
        cart = set(user_orders[group_col])
    
    return cart


In [8]:
joblib.dump(order_products, "order_products.pkl")

['order_products.pkl']

In [6]:
import joblib

# Save rules to disk
joblib.dump(rules_product, "rules_product.pkl")
joblib.dump(rules_aisle, "rules_aisle.pkl")
joblib.dump(rules_department, "rules_department.pkl")

print("Rules saved successfully!")


Rules saved successfully!
