In [1]:
import pandas as pd 
import numpy as np 
import random
from datetime import datetime, timedelta

In [2]:
# Block 2: Generate Sample Dataset

# Parameters
n_users = 10000
today = datetime.today()

# Helper function for random dates
def random_date(start, end):
    return start + timedelta(days=random.randint(0, (end-start).days))

# Generate dataset
data = {
    "user_id": [f"user_{i}" for i in range(1, n_users+1)],
    "cart_abandoned_date": [today - timedelta(days=random.randint(0, 10)) for _ in range(n_users)],
    "last_order_date": [today - timedelta(days=random.randint(5, 200)) for _ in range(n_users)],
    "avg_order_value": np.random.randint(200, 6000, size=n_users),  # INR
    "sessions_last_30d": np.random.poisson(5, size=n_users),  # Poisson dist for session counts
    "num_cart_items": np.random.randint(1, 10, size=n_users),
    "engagement_score": np.round(np.random.uniform(0, 1, size=n_users), 2),
    "profitability_score": np.round(np.random.uniform(0, 1, size=n_users), 2),
}

df = pd.DataFrame(data)

print("Sample Data:")
df.head()


Sample Data:


Unnamed: 0,user_id,cart_abandoned_date,last_order_date,avg_order_value,sessions_last_30d,num_cart_items,engagement_score,profitability_score
0,user_1,2025-09-18 23:26:55.769076,2025-04-28 23:26:55.769076,4183,5,1,0.7,0.19
1,user_2,2025-09-16 23:26:55.769076,2025-08-06 23:26:55.769076,889,2,6,0.54,0.29
2,user_3,2025-09-17 23:26:55.769076,2025-03-09 23:26:55.769076,2098,9,4,0.75,0.05
3,user_4,2025-09-09 23:26:55.769076,2025-07-22 23:26:55.769076,2591,11,5,0.85,0.49
4,user_5,2025-09-16 23:26:55.769076,2025-04-07 23:26:55.769076,5958,5,7,0.14,0.25


In [4]:
# Block 3: Define Universe (Cart abandoners in last 7 days)
universe_df = df[df['cart_abandoned_date'] >= today - timedelta(days=7)]

print(f"Total Users in Universe: {len(universe_df)}")
universe_df.head()


Total Users in Universe: 7299


Unnamed: 0,user_id,cart_abandoned_date,last_order_date,avg_order_value,sessions_last_30d,num_cart_items,engagement_score,profitability_score
0,user_1,2025-09-18 23:26:55.769076,2025-04-28 23:26:55.769076,4183,5,1,0.7,0.19
1,user_2,2025-09-16 23:26:55.769076,2025-08-06 23:26:55.769076,889,2,6,0.54,0.29
2,user_3,2025-09-17 23:26:55.769076,2025-03-09 23:26:55.769076,2098,9,4,0.75,0.05
4,user_5,2025-09-16 23:26:55.769076,2025-04-07 23:26:55.769076,5958,5,7,0.14,0.25
5,user_6,2025-09-12 23:26:55.769076,2025-07-13 23:26:55.769076,287,2,4,0.17,0.33
