In [17]:
import numpy as np
import pandas as pd

# -----------------------------
# 1. Experiment Configuration (Pre-Determined)
# -----------------------------

np.random.seed(42)
n_per_group = 10_000        # 10,000 recipients per variant

# ubject‑line variants A is baseline version (control), B is variant
# 25 % open rate – Urgency line
# 27 % open rate – Hype/emoji line
# 12 % click‑through given open (Group A)
# 10 % click‑through given open (Group B)

open_rate_A        = 0.25
open_rate_B        = 0.27
click_given_open_A = 0.12
click_given_open_B = 0.10

# -----------------------------
# 2. Simulate Opens & Clicks
# -----------------------------

# generate two groups, A and B, 10,000 each
# generate 20,000 user ids

groups   = np.repeat(['A', 'B'], n_per_group)
user_ids = np.arange(1, 2 * n_per_group + 1)

# how many opened A from A, B from B

opened_A = np.random.binomial(1, open_rate_A, n_per_group)
opened_B = np.random.binomial(1, open_rate_B, n_per_group)
opened   = np.concatenate([opened_A, opened_B])

# how many click on the link from those that opened their respective email

clicked_A = opened_A * np.random.binomial(1, click_given_open_A, n_per_group)
clicked_B = opened_B * np.random.binomial(1, click_given_open_B, n_per_group)
clicked   = np.concatenate([clicked_A, clicked_B])

# -----------------------------
# 3. Assemble DataFrame
# -----------------------------

df = pd.DataFrame({
    'user_id': user_ids,
    'group'  : groups,
    'opened' : opened,
    'clicked': clicked })

# randomize rows but keep user id organized

df_shuffled = df.sample(frac=1, random_state = 42).reset_index(drop = True)
df_shuffled['user_id'] = np.arange(1, len(df_shuffled) + 1)

# save to csv

df_shuffled.to_csv('sneaker_drop_email_ab_data.csv', index = False)
print("Simulated dataset shape:", df.shape)
print(df_shuffled.head())

Simulated dataset shape: (20000, 4)
   user_id group  opened  clicked
0        1     B       1        0
1        2     A       0        0
2        3     A       0        0
3        4     A       0        0
4        5     B       0        0
