# Predicting conversion (per user)

## Loading libraries

In [1]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

## Simulating data

In [2]:
np.random.seed(42)
n = 20_000

df = pd.DataFrame({
    "hour": np.random.randint(0, 24, n),
    "weekday": np.random.randint(0, 7, n),
    "is_new_user": np.random.binomial(1, 0.6, n),
    "sessions_last_7d": np.random.poisson(2, n),
    "device_mobile": np.random.binomial(1, 0.7, n)
})

df["is_weekend"] = df["weekday"].isin([5, 6]).astype(int)
df.head()

Unnamed: 0,hour,weekday,is_new_user,sessions_last_7d,device_mobile,is_weekend
0,6,0,1,5,1,0
1,19,0,1,0,1,0
2,14,3,1,4,1,0
3,10,1,1,1,1,0
4,7,2,0,3,1,0


In [3]:
df["hour_sin"] = np.sin(2 * np.pi * df["hour"] / 24)
df["hour_cos"] = np.cos(2 * np.pi * df["hour"] / 24)

df["weekday_sin"] = np.sin(2 * np.pi * df["weekday"] / 7)
df["weekday_cos"] = np.cos(2 * np.pi * df["weekday"] / 7)
df.head()

Unnamed: 0,hour,weekday,is_new_user,sessions_last_7d,device_mobile,is_weekend,hour_sin,hour_cos,weekday_sin,weekday_cos
0,6,0,1,5,1,0,1.0,6.123234000000001e-17,0.0,1.0
1,19,0,1,0,1,0,-0.965926,0.258819,0.0,1.0
2,14,3,1,4,1,0,-0.5,-0.8660254,0.433884,-0.900969
3,10,1,1,1,1,0,0.5,-0.8660254,0.781831,0.62349
4,7,2,0,3,1,0,0.965926,-0.258819,0.974928,-0.222521


In [4]:
base = -2
hour_effect = (df["hour"].between(18, 22)).astype(int) * 1.0
weekday_effect = (df["weekday"].between(1, 4)).astype(int) * 0.5
returning_bonus = (1 - df["is_new_user"]) * 0.7

logit = base + hour_effect + weekday_effect + returning_bonus
prob = 1 / (1 + np.exp(-logit))

df["converted"] = np.random.binomial(1, prob)
df.head()

Unnamed: 0,hour,weekday,is_new_user,sessions_last_7d,device_mobile,is_weekend,hour_sin,hour_cos,weekday_sin,weekday_cos,converted
0,6,0,1,5,1,0,1.0,6.123234000000001e-17,0.0,1.0,1
1,19,0,1,0,1,0,-0.965926,0.258819,0.0,1.0,0
2,14,3,1,4,1,0,-0.5,-0.8660254,0.433884,-0.900969,1
3,10,1,1,1,1,0,0.5,-0.8660254,0.781831,0.62349,1
4,7,2,0,3,1,0,0.965926,-0.258819,0.974928,-0.222521,0


## Creating model

In [5]:
features = [
    "hour", "weekday", "is_weekend",
    "hour_sin", "hour_cos",
    "weekday_sin", "weekday_cos",
    "is_new_user", "sessions_last_7d", "device_mobile"
]

X = df[features]
y = df["converted"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

model = RandomForestClassifier(
    n_estimators=300,
    max_depth=8,
    min_samples_leaf=50,
    random_state=42
)

model.fit(X_train, y_train)

print("ROC AUC:", roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]))

ROC AUC: 0.6575559999999999


In [None]:
importances = pd.Series(
    model.feature_importances_,
    index=features
).sort_values(ascending=False)

print(importances)

hour                0.291924
is_new_user         0.204986
hour_sin            0.151934
hour_cos            0.107877
weekday_sin         0.063100
weekday             0.057896
weekday_cos         0.052105
sessions_last_7d    0.033503
is_weekend          0.023555
device_mobile       0.013120
dtype: float64


In [8]:
grid = pd.DataFrame([
    {"hour": h, "weekday": d}
    for h in range(24)
    for d in range(7)
])

grid["is_weekend"] = grid["weekday"].isin([5, 6]).astype(int)
grid["hour_sin"] = np.sin(2 * np.pi * grid["hour"] / 24)
grid["hour_cos"] = np.cos(2 * np.pi * grid["hour"] / 24)
grid["weekday_sin"] = np.sin(2 * np.pi * grid["weekday"] / 7)
grid["weekday_cos"] = np.cos(2 * np.pi * grid["weekday"] / 7)

# Fix other variables (scenario simulation)
grid["is_new_user"] = 0
grid["sessions_last_7d"] = 3
grid["device_mobile"] = 1

grid["conversion_prob"] = model.predict_proba(grid[features])[:, 1]

best = grid.sort_values("conversion_prob", ascending=False).head(5)
display(best)

Unnamed: 0,hour,weekday,is_weekend,hour_sin,hour_cos,weekday_sin,weekday_cos,is_new_user,sessions_last_7d,device_mobile,conversion_prob
141,20,1,0,-0.866025,0.5,0.781831,0.62349,0,3,1,0.634944
155,22,1,0,-0.5,0.866025,0.781831,0.62349,0,3,1,0.623145
148,21,1,0,-0.707107,0.707107,0.781831,0.62349,0,3,1,0.621074
151,21,4,0,-0.707107,0.707107,-0.433884,-0.900969,0,3,1,0.619594
134,19,1,0,-0.965926,0.258819,0.781831,0.62349,0,3,1,0.612869
