In [1]:
import random
import uuid
import datetime
import time
import numpy as np
import pandas as pd
from uuid_extensions import uuid7

In [2]:
experiment_uuid_list = [
    str(uuid.uuid4()),
    str(uuid.uuid4()),
    str(uuid.uuid4()),
]

experiment_groups = ['A', 'B']

# Funnel stages in order.
funnel = [
    'homepage_viewed',
    'product_viewed',
    'product_added_to_cart',
    'order_placed',
    'review_written'
]

In [3]:
def dt_to_ms(dt):
    """Convert datetime to UNIX timestamp in milliseconds."""
    return int(dt.timestamp() * 1000)

# Generate a random start datetime for an experiment.
# We require that all event timestamps are >= 2024-01-01 and within a 90-day period.
def get_experiment_period():
    start_range = datetime.datetime(2024, 1, 1)
    end_range = datetime.datetime(2024, 12, 31) - datetime.timedelta(days=90)
    offset_days = random.randint(0, (end_range - start_range).days)
    exp_start = start_range + datetime.timedelta(days=offset_days)
    exp_end = exp_start + datetime.timedelta(days=90)
    return dt_to_ms(exp_start), dt_to_ms(exp_end)

In [4]:
events = []

# For each experiment group, generate a random number of users (between 9500 and 10500)
for exp_uuid in experiment_uuid_list:
    exp_start_ms, exp_end_ms = get_experiment_period()
    for group in experiment_groups:
        n_users = random.randint(9500, 10500)
        for _ in range(n_users):
            user_id = str(uuid.uuid4())  # UUIDv4 for the user

            # --- Simulate the funnel conversion counts for a user ---
            # Ensure every user has at least one homepage_viewed event.
            homepage_count = random.randint(1, 5)
            # For subsequent stages, the count is chosen from 0 to (previous_count - 1).
            product_viewed_count = random.randint(0, homepage_count - 1) if homepage_count > 1 else 0
            product_added_count = random.randint(0, product_viewed_count - 1) if product_viewed_count > 1 else 0
            order_count = random.randint(0, product_added_count - 1) if product_added_count > 1 else 0
            review_count = random.randint(0, order_count - 1) if order_count > 1 else 0

            stage_counts = [
                homepage_count,
                product_viewed_count,
                product_added_count,
                order_count,
                review_count
            ]
            total_events = sum(stage_counts)
            if total_events == 0:
                continue

            # --- Generate ordered timestamps for this user ---
            # We generate total_events timestamps uniformly in the experiment period and sort them.
            user_event_ts = np.sort(np.random.uniform(exp_start_ms, exp_end_ms, total_events))
            # To guarantee that events of later stages occur after previous stages,
            # we partition the sorted timestamps according to the counts.
            idx = 0
            for stage, count in zip(funnel, stage_counts):
                # Only add events if count > 0 (note: a 0 count means user did not convert for that stage)
                if count > 0:
                    # Get the next "count" timestamps for this stage.
                    stage_timestamps = user_event_ts[idx: idx + count]
                    idx += count
                    # For each event, generate an event_uuid (UUIDv7) based on the timestamp.
                    for ts in stage_timestamps:
                        event = {
                            "event_uuid": uuid7(ts, 'str'),
                            "user_uuid": user_id,
                            "experiment_uuid": exp_uuid,
                            "experiment_group": group,
                            "event_name": stage,
                            "event_ts": int(ts)
                        }
                        events.append(event)
                else:
                    continue

df_events = pd.DataFrame(events)

print("Total events generated:", len(df_events))
print("Total unique users:", df_events['user_uuid'].nunique())
print("Event types distribution:")
print(df_events['event_name'].value_counts())

Total events generated: 261909
Total unique users: 61435
Event types distribution:
event_name
homepage_viewed          184103
product_viewed            61767
product_added_to_cart     14081
order_placed               1858
review_written              100
Name: count, dtype: int64


In [5]:
df_events

Unnamed: 0,event_uuid,user_uuid,experiment_uuid,experiment_group,event_name,event_ts
0,0000006a-f71e-7a81-8000-82eaeb6aea7f,1b43d952-2d92-4faa-b86b-be5ad82406cc,ff8f8794-99e5-4828-bb91-47ca78d18038,A,homepage_viewed,1711444984548
1,0000006a-9eb5-7446-8000-a70eb4a9d455,40d1f981-972f-4be6-b7a8-5bb28dca9a4b,ff8f8794-99e5-4828-bb91-47ca78d18038,A,homepage_viewed,1705919254687
2,0000006a-a2be-7df2-8000-20d8a1c2fc97,40d1f981-972f-4be6-b7a8-5bb28dca9a4b,ff8f8794-99e5-4828-bb91-47ca78d18038,A,homepage_viewed,1706171599516
3,0000006a-a74f-7f60-8000-864e195fcf97,40d1f981-972f-4be6-b7a8-5bb28dca9a4b,ff8f8794-99e5-4828-bb91-47ca78d18038,A,homepage_viewed,1706457021748
4,0000006a-ac36-77c5-8000-dfeb91925b1f,40d1f981-972f-4be6-b7a8-5bb28dca9a4b,ff8f8794-99e5-4828-bb91-47ca78d18038,A,homepage_viewed,1706763302166
...,...,...,...,...,...,...
261904,0000006b-43ad-7c85-8000-cb16135be7d7,c3d19602-3460-43e9-b77c-ceba57477c6c,0f874312-b292-4118-b0f9-211d4192e847,B,product_viewed,1716229927369
261905,0000006b-4608-71b7-8000-d3021ddea32b,c3d19602-3460-43e9-b77c-ceba57477c6c,0f874312-b292-4118-b0f9-211d4192e847,B,product_viewed,1716376979347
261906,0000006b-4729-7fb1-8000-932ce71f58c6,c3d19602-3460-43e9-b77c-ceba57477c6c,0f874312-b292-4118-b0f9-211d4192e847,B,product_viewed,1716447749230
261907,0000006b-48c8-75ab-8000-ef7522f12873,c3d19602-3460-43e9-b77c-ceba57477c6c,0f874312-b292-4118-b0f9-211d4192e847,B,product_added_to_cart,1716548914643


In [6]:
for stage in funnel:
    df_events[df_events['event_name']==stage].to_csv(f'data/{stage}.csv', index=False)