In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("simulated_data.csv")
df.head()

Unnamed: 0,user_id,account_id,user_role,plan_type,country,timezone,session_length_seconds,pages_visited,last_seen_days_ago,conversation_open,previous_messages_sent,previous_messages_clicked,previous_messages_dismissed,message_type,send_message,clicked,replied,conversation_resolved
0,0,1339,end_user,pro,US,-1,311,5,1,0,1,1,0,bug_help,1,1,1,1
1,1,11609,end_user,pro,APAC,8,353,8,7,1,0,0,0,bug_help,1,1,1,0
2,2,9818,end_user,pro,LATAM,-3,216,4,0,0,2,1,1,none,0,1,1,1
3,3,6583,admin,pro,LATAM,-5,348,5,10,0,2,0,2,onboarding_tip,0,0,0,0
4,4,6495,end_user,pro,EU,6,301,4,13,0,1,1,0,onboarding_tip,0,0,0,1


In [3]:
df["reward"] = (
    1.5 * df["replied"]
    + 0.5 * df["clicked"]
    - 1.0 * df["previous_messages_dismissed"]
)
df.head()

Unnamed: 0,user_id,account_id,user_role,plan_type,country,timezone,session_length_seconds,pages_visited,last_seen_days_ago,conversation_open,previous_messages_sent,previous_messages_clicked,previous_messages_dismissed,message_type,send_message,clicked,replied,conversation_resolved,reward
0,0,1339,end_user,pro,US,-1,311,5,1,0,1,1,0,bug_help,1,1,1,1,2.0
1,1,11609,end_user,pro,APAC,8,353,8,7,1,0,0,0,bug_help,1,1,1,0,2.0
2,2,9818,end_user,pro,LATAM,-3,216,4,0,0,2,1,1,none,0,1,1,1,1.0
3,3,6583,admin,pro,LATAM,-5,348,5,10,0,2,0,2,onboarding_tip,0,0,0,0,-2.0
4,4,6495,end_user,pro,EU,6,301,4,13,0,1,1,0,onboarding_tip,0,0,0,1,0.0


“I’d optimize expected reward rather than raw click-through rate.”

# Exploratory Data Analysis

In [4]:
df[['clicked', 'replied', 'conversation_resolved']].describe()

Unnamed: 0,clicked,replied,conversation_resolved
count,100000.0,100000.0,100000.0
mean,0.53584,0.5429,0.51682
std,0.498716,0.498159,0.49972
min,0.0,0.0,0.0
25%,0.0,0.0,0.0
50%,1.0,1.0,1.0
75%,1.0,1.0,1.0
max,1.0,1.0,1.0


In [5]:
df.groupby('message_type')['clicked'].mean()

message_type
bug_help          0.533267
none              0.531322
onboarding_tip    0.538431
pricing_help      0.538353
Name: clicked, dtype: float64

# Feature engineering

In [6]:
df["engagement_rate"] = (
    df["previous_messages_clicked"] /
    (df["previous_messages_sent"] + 1)
)

df["fatigue_score"] = (
    df["previous_messages_sent"] -
    df["previous_messages_clicked"]
)

features = [
    "session_length_seconds",
    "pages_visited",
    "last_seen_days_ago",
    "conversation_open",
    "engagement_rate",
    "fatigue_score"
]

target = "clicked"

# Baseline model

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

In [8]:
X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

preds = model.predict_proba(X_test)[:, 1]
roc_auc_score(y_test, preds)

0.9038676715150068

“This gives me a strong, interpretable baseline.”

# Contextual bandit formulation

In [9]:
ACTIONS = ["none", "onboarding_tip", "pricing_help", "bug_help"]

In [10]:
df.head()

Unnamed: 0,user_id,account_id,user_role,plan_type,country,timezone,session_length_seconds,pages_visited,last_seen_days_ago,conversation_open,...,previous_messages_clicked,previous_messages_dismissed,message_type,send_message,clicked,replied,conversation_resolved,reward,engagement_rate,fatigue_score
0,0,1339,end_user,pro,US,-1,311,5,1,0,...,1,0,bug_help,1,1,1,1,2.0,0.5,0
1,1,11609,end_user,pro,APAC,8,353,8,7,1,...,0,0,bug_help,1,1,1,0,2.0,0.0,0
2,2,9818,end_user,pro,LATAM,-3,216,4,0,0,...,1,1,none,0,1,1,1,1.0,0.333333,1
3,3,6583,admin,pro,LATAM,-5,348,5,10,0,...,0,2,onboarding_tip,0,0,0,0,-2.0,0.0,2
4,4,6495,end_user,pro,EU,6,301,4,13,0,...,1,0,onboarding_tip,0,0,0,1,0.0,0.5,0


In [11]:
from sklearn.linear_model import SGDRegressor
from collections import defaultdict

SGD Regressor is important for reasons like:

- It can handle big datasets and is computationally efficient. It is therefore appropriate for big data applications.
- It has the ability to learn online, which allows it to update the model whenever fresh data becomes available. This is crucial for applications that use real-time data streams.
- It is appropriate for distributed computing systems since it may be parallelized.
- To avoid overfitting and enhance generalization, SGD can be expanded to incorporate regularization strategies as L1 (Lasso) and L2 (Ridge) regularization.
- It may be applied to various regression algorithms, such as support vector machines (SVM) and neural networks, and is not just restricted to linear regression.

In [12]:
df_sample = df.sample(1000)
X_train = df_sample[features].values
y_train = df_sample[target].values

In [13]:
models = {
    action: SGDRegressor(learning_rate="constant", eta0=0.01)
    for action in ACTIONS
}

# Initialize
for m in models.values():
    m.partial_fit(X_train[:10], np.zeros(10))

In [14]:
import random

from tqdm import tqdm
tqdm.pandas()

In [15]:
def select_action(context, epsilon=0.1):
    if random.random() < epsilon:
        return random.choice(ACTIONS)
    scores = {
        a: models[a].predict(context.reshape(1, -1))[0]
        for a in ACTIONS
    }
    return max(scores, key=scores.get)

In [16]:
total_reward = 0

with tqdm(total=len(df_sample)) as pbar:

    for _, row in df_sample.iterrows():
    #for _, row in tqdm(df.iterrows(), total=len(df)):

        context = row[features].values
        action = select_action(context)

        reward = row["reward"] if row["message_type"] == action else 0

        models[action].partial_fit(
            context.reshape(1, -1),
            [reward]
        )

        total_reward += reward

        pbar.update(1)

        # Display the completion percentage
        print(f"Completion: {pbar.n}/{pbar.total}")

total_reward

  8%|▊         | 76/1000 [00:00<00:02, 353.98it/s]

Completion: 1/1000
Completion: 2/1000
Completion: 3/1000
Completion: 4/1000
Completion: 5/1000
Completion: 6/1000
Completion: 7/1000
Completion: 8/1000
Completion: 9/1000
Completion: 10/1000
Completion: 11/1000
Completion: 12/1000
Completion: 13/1000
Completion: 14/1000
Completion: 15/1000
Completion: 16/1000
Completion: 17/1000
Completion: 18/1000
Completion: 19/1000
Completion: 20/1000
Completion: 21/1000
Completion: 22/1000
Completion: 23/1000
Completion: 24/1000
Completion: 25/1000
Completion: 26/1000
Completion: 27/1000
Completion: 28/1000
Completion: 29/1000
Completion: 30/1000
Completion: 31/1000
Completion: 32/1000
Completion: 33/1000
Completion: 34/1000
Completion: 35/1000
Completion: 36/1000
Completion: 37/1000
Completion: 38/1000
Completion: 39/1000
Completion: 40/1000
Completion: 41/1000
Completion: 42/1000
Completion: 43/1000
Completion: 44/1000
Completion: 45/1000
Completion: 46/1000
Completion: 47/1000
Completion: 48/1000
Completion: 49/1000
Completion: 50/1000
Completio

 26%|██▌       | 258/1000 [00:00<00:01, 564.84it/s]

Completion: 109/1000
Completion: 110/1000
Completion: 111/1000
Completion: 112/1000
Completion: 113/1000
Completion: 114/1000
Completion: 115/1000
Completion: 116/1000
Completion: 117/1000
Completion: 118/1000
Completion: 119/1000
Completion: 120/1000
Completion: 121/1000
Completion: 122/1000
Completion: 123/1000
Completion: 124/1000
Completion: 125/1000
Completion: 126/1000
Completion: 127/1000
Completion: 128/1000
Completion: 129/1000
Completion: 130/1000
Completion: 131/1000
Completion: 132/1000
Completion: 133/1000
Completion: 134/1000
Completion: 135/1000
Completion: 136/1000
Completion: 137/1000
Completion: 138/1000
Completion: 139/1000
Completion: 140/1000
Completion: 141/1000
Completion: 142/1000
Completion: 143/1000
Completion: 144/1000
Completion: 145/1000
Completion: 146/1000
Completion: 147/1000
Completion: 148/1000
Completion: 149/1000
Completion: 150/1000
Completion: 151/1000
Completion: 152/1000
Completion: 153/1000
Completion: 154/1000
Completion: 155/1000
Completion: 1

 38%|███▊      | 378/1000 [00:00<00:01, 582.93it/s]

Completion: 258/1000
Completion: 259/1000
Completion: 260/1000
Completion: 261/1000
Completion: 262/1000
Completion: 263/1000
Completion: 264/1000
Completion: 265/1000
Completion: 266/1000
Completion: 267/1000
Completion: 268/1000
Completion: 269/1000
Completion: 270/1000
Completion: 271/1000
Completion: 272/1000
Completion: 273/1000
Completion: 274/1000
Completion: 275/1000
Completion: 276/1000
Completion: 277/1000
Completion: 278/1000
Completion: 279/1000
Completion: 280/1000
Completion: 281/1000
Completion: 282/1000
Completion: 283/1000
Completion: 284/1000
Completion: 285/1000
Completion: 286/1000
Completion: 287/1000
Completion: 288/1000
Completion: 289/1000
Completion: 290/1000
Completion: 291/1000
Completion: 292/1000
Completion: 293/1000
Completion: 294/1000
Completion: 295/1000
Completion: 296/1000
Completion: 297/1000
Completion: 298/1000
Completion: 299/1000
Completion: 300/1000
Completion: 301/1000
Completion: 302/1000
Completion: 303/1000
Completion: 304/1000
Completion: 3

 51%|█████     | 512/1000 [00:00<00:00, 584.46it/s]

Completion: 402/1000
Completion: 403/1000
Completion: 404/1000
Completion: 405/1000
Completion: 406/1000
Completion: 407/1000
Completion: 408/1000
Completion: 409/1000
Completion: 410/1000
Completion: 411/1000
Completion: 412/1000
Completion: 413/1000
Completion: 414/1000
Completion: 415/1000
Completion: 416/1000
Completion: 417/1000
Completion: 418/1000
Completion: 419/1000
Completion: 420/1000
Completion: 421/1000
Completion: 422/1000
Completion: 423/1000
Completion: 424/1000
Completion: 425/1000
Completion: 426/1000
Completion: 427/1000
Completion: 428/1000
Completion: 429/1000
Completion: 430/1000
Completion: 431/1000
Completion: 432/1000
Completion: 433/1000
Completion: 434/1000
Completion: 435/1000
Completion: 436/1000
Completion: 437/1000
Completion: 438/1000
Completion: 439/1000
Completion: 440/1000
Completion: 441/1000
Completion: 442/1000
Completion: 443/1000
Completion: 444/1000
Completion: 445/1000
Completion: 446/1000
Completion: 447/1000
Completion: 448/1000
Completion: 4

 64%|██████▍   | 644/1000 [00:01<00:00, 605.54it/s]

Completion: 556/1000
Completion: 557/1000
Completion: 558/1000
Completion: 559/1000
Completion: 560/1000
Completion: 561/1000
Completion: 562/1000
Completion: 563/1000
Completion: 564/1000
Completion: 565/1000
Completion: 566/1000
Completion: 567/1000
Completion: 568/1000
Completion: 569/1000
Completion: 570/1000
Completion: 571/1000
Completion: 572/1000
Completion: 573/1000
Completion: 574/1000
Completion: 575/1000
Completion: 576/1000
Completion: 577/1000
Completion: 578/1000
Completion: 579/1000
Completion: 580/1000
Completion: 581/1000
Completion: 582/1000
Completion: 583/1000
Completion: 584/1000
Completion: 585/1000
Completion: 586/1000
Completion: 587/1000
Completion: 588/1000
Completion: 589/1000
Completion: 590/1000
Completion: 591/1000
Completion: 592/1000
Completion: 593/1000
Completion: 594/1000
Completion: 595/1000
Completion: 596/1000
Completion: 597/1000
Completion: 598/1000
Completion: 599/1000
Completion: 600/1000
Completion: 601/1000
Completion: 602/1000
Completion: 6

 86%|████████▌ | 861/1000 [00:01<00:00, 659.15it/s]

Completion: 717/1000
Completion: 718/1000
Completion: 719/1000
Completion: 720/1000
Completion: 721/1000
Completion: 722/1000
Completion: 723/1000
Completion: 724/1000
Completion: 725/1000
Completion: 726/1000
Completion: 727/1000
Completion: 728/1000
Completion: 729/1000
Completion: 730/1000
Completion: 731/1000
Completion: 732/1000
Completion: 733/1000
Completion: 734/1000
Completion: 735/1000
Completion: 736/1000
Completion: 737/1000
Completion: 738/1000
Completion: 739/1000
Completion: 740/1000
Completion: 741/1000
Completion: 742/1000
Completion: 743/1000
Completion: 744/1000
Completion: 745/1000
Completion: 746/1000
Completion: 747/1000
Completion: 748/1000
Completion: 749/1000
Completion: 750/1000
Completion: 751/1000
Completion: 752/1000
Completion: 753/1000
Completion: 754/1000
Completion: 755/1000
Completion: 756/1000
Completion: 757/1000
Completion: 758/1000
Completion: 759/1000
Completion: 760/1000
Completion: 761/1000
Completion: 762/1000
Completion: 763/1000
Completion: 7

 93%|█████████▎| 928/1000 [00:01<00:00, 646.82it/s]

Completion: 863/1000
Completion: 864/1000
Completion: 865/1000
Completion: 866/1000
Completion: 867/1000
Completion: 868/1000
Completion: 869/1000
Completion: 870/1000
Completion: 871/1000
Completion: 872/1000
Completion: 873/1000
Completion: 874/1000
Completion: 875/1000
Completion: 876/1000
Completion: 877/1000
Completion: 878/1000
Completion: 879/1000
Completion: 880/1000
Completion: 881/1000
Completion: 882/1000
Completion: 883/1000
Completion: 884/1000
Completion: 885/1000
Completion: 886/1000
Completion: 887/1000
Completion: 888/1000
Completion: 889/1000
Completion: 890/1000
Completion: 891/1000
Completion: 892/1000
Completion: 893/1000
Completion: 894/1000
Completion: 895/1000
Completion: 896/1000
Completion: 897/1000
Completion: 898/1000
Completion: 899/1000
Completion: 900/1000
Completion: 901/1000
Completion: 902/1000
Completion: 903/1000
Completion: 904/1000
Completion: 905/1000
Completion: 906/1000
Completion: 907/1000
Completion: 908/1000
Completion: 909/1000
Completion: 9

100%|██████████| 1000/1000 [00:01<00:00, 583.73it/s]

Completion: 986/1000
Completion: 987/1000
Completion: 988/1000
Completion: 989/1000
Completion: 990/1000
Completion: 991/1000
Completion: 992/1000
Completion: 993/1000
Completion: 994/1000
Completion: 995/1000
Completion: 996/1000
Completion: 997/1000
Completion: 998/1000
Completion: 999/1000
Completion: 1000/1000





-107.0

This simulates learning from interaction without needing full supervision.

## Saving model

In [17]:
import joblib

joblib.dump(models, "bandit_models.joblib")

['bandit_models.joblib']

## Loading the models

In [18]:
import joblib

models_loaded = joblib.load("bandit_models.joblib")

## Inference

In [19]:
def predict_action_sgd(context):
    # context must be 2D for sklearn
    context = context.reshape(1, -1)

    predicted_rewards = {
        action: models[action].predict(context)[0]
        for action in ACTIONS
    }

    return max(predicted_rewards, key=predicted_rewards.get)

In [23]:
new_user_features = df_sample[features].sample(1).values

In [24]:
context = new_user_features.astype(float)
best_action = predict_action_sgd(context)

In [25]:
best_action

'pricing_help'