In [44]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules


# 1. Load data
df = pd.read_csv('orders.csv')
df = df[['order_id', 'product_id', 'reordered']]


In [45]:
df

Unnamed: 0,order_id,product_id,reordered
0,2,33120,1
1,2,28985,1
2,2,9327,0
3,2,45918,1
4,2,30035,0
...,...,...,...
87,10,14992,0
88,10,49235,1
89,10,26842,0
90,10,3464,0


In [47]:

# 2. Build transactions for ALL orders
transactions_all = (
    df
    .groupby('order_id')['product_id']
    .apply(lambda items: list(map(str, items)))
    .tolist()
)

# 3. One-hot encode ALL transactions
te_all = TransactionEncoder()
te_ary_all = te_all.fit(transactions_all).transform(transactions_all)
df_ohe_all = pd.DataFrame(te_ary_all, columns=te_all.columns_)

# 4. Run FP-Growth on ALL orders
freq_itemsets_all = fpgrowth(df_ohe_all, min_support=0.2, use_colnames=True)

# -------------------
# 5. INSIGHTS for ALL ORDERS
# -------------------

# 5a. Top-10 itemsets by support
top_itemsets = freq_itemsets_all.sort_values('support', ascending=False).head(10)
print("Top 10 frequent itemsets (all orders):")
print(top_itemsets, "\n")

# 5b. Generate association rules
rules = association_rules(freq_itemsets_all, metric="confidence", min_threshold=0.6)

# 5c. Top-10 rules by lift
strong_rules = rules.sort_values('lift', ascending=False).head(10)
print("Top 10 association rules (by lift):")
print(strong_rules[['antecedents','consequents','support','confidence','lift']], "\n")



Top 10 frequent itemsets (all orders):
    support itemsets
0  0.222222  (14992) 

Top 10 association rules (by lift):
Empty DataFrame
Columns: [antecedents, consequents, support, confidence, lift]
Index: [] 



# A considerar

top_itemsets: the 10 most common bundles of products, so you know which combinations to promote together.


rules: “if-then” patterns, e.g. if a customer buys {A,B}, they also buy {C} with confidence X and lift Y.


strong_rules: the highest-impact recommendations (highest lift), ideal for cross-sell or recommendation engine features.

### Variar siguientes parametros


min_support in fpgrowth to find more (or fewer) itemsets.


min_threshold in association_rules to tighten or loosen confidence requirements.

## Incluyendo columna reordered 

combinations of products are most common among first-time purchases (reordered == 0) versus which stickiest combinations drive repeat orders (reordered == 1).


In [48]:

# -------------------
# 6. FREQUENT BUNDLES OF REORDERED ITEMS
# -------------------

# 6a. Transactions of NEW items only (reordered == 0)
transactions_new = (
    df[df.reordered == 0]
    .groupby('order_id')['product_id']
    .apply(lambda items: list(map(str, items)))
    .tolist()
)

# 6b. Transactions of REPEAT items only (reordered == 1)
transactions_repeat = (
    df[df.reordered == 1]
    .groupby('order_id')['product_id']
    .apply(lambda items: list(map(str, items)))
    .tolist()
)

# 6c. One-hot encode NEW-item transactions
te_new = TransactionEncoder()
te_ary_new = te_new.fit(transactions_new).transform(transactions_new)
df_ohe_new = pd.DataFrame(te_ary_new, columns=te_new.columns_)

# 6d. One-hot encode REPEAT-item transactions
te_repeat = TransactionEncoder()
te_ary_repeat = te_repeat.fit(transactions_repeat).transform(transactions_repeat)
df_ohe_repeat = pd.DataFrame(te_ary_repeat, columns=te_repeat.columns_)

# 6e. Run FP-Growth on NEW-item transactions
freq_itemsets_new = fpgrowth(df_ohe_new, min_support=0.2, use_colnames=True)

# 6f. Run FP-Growth on REPEAT-item transactions
freq_itemsets_repeat = fpgrowth(df_ohe_repeat, min_support=0.2, use_colnames=True)

# 6g. Print the top bundles for each
print("Top 10 frequent NEW-item bundles:")
print(freq_itemsets_new.sort_values('support', ascending=False).head(10), "\n")

print("Top 10 frequent REPEAT-item bundles:")
print(freq_itemsets_repeat.sort_values('support', ascending=False).head(10))


Top 10 frequent NEW-item bundles:
Empty DataFrame
Columns: [support, itemsets]
Index: [] 

Top 10 frequent REPEAT-item bundles:
Empty DataFrame
Columns: [support, itemsets]
Index: []
