In [None]:

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random


N_USERS = 5000
FUNNEL_STAGES = [
    'landing_page',
    'product_view',
    'add_to_cart',
    'checkout_start',
    'purchase_complete'
]
DEVICE_TYPES = ['Mobile', 'Desktop', 'Tablet']
TRAFFIC_SOURCES = ['Organic Search', 'Paid Ads', 'Social Media', 'Direct', 'Email']
COUNTRIES = ['US', 'UK', 'CA', 'AU', 'DE', 'FR', 'JP', 'IN', 'BR']


data = []
user_counter = 1
base_date = datetime(2023, 5, 1)

print("Generating funnel dataset...")
for _ in range(N_USERS):
    user_id = f"user_{user_counter}"
    device = random.choice(DEVICE_TYPES)
    traffic_source = random.choice(TRAFFIC_SOURCES)
    country = random.choice(COUNTRIES)
    

    timestamp = base_date + timedelta(
        days=random.randint(0, 60), 
        hours=random.randint(0, 23),
        minutes=random.randint(0, 59)
    )
    

    data.append({
        'user_id': user_id,
        'event': 'landing_page',
        'timestamp': timestamp,
        'device': device,
        'traffic_source': traffic_source,
        'country': country
    })
    

    prob_modifier = 1.0
    if device == 'Mobile': prob_modifier *= 0.85
    if traffic_source == 'Paid Ads': prob_modifier *= 0.75
    if country in ['US', 'UK']: prob_modifier *= 1.1
    if country in ['IN', 'BR']: prob_modifier *= 0.9
    

    current_stage = 0
    
    while current_stage < len(FUNNEL_STAGES) - 1:

        continuation_probs = {
            'landing_page': 0.75 * prob_modifier,
            'product_view': 0.60 * prob_modifier,
            'add_to_cart': 0.45 * prob_modifier,
            'checkout_start': 0.80 * prob_modifier
        }
        

        if random.random() < continuation_probs[FUNNEL_STAGES[current_stage]]:
            current_stage += 1
            # Add time between events (minutes)
            timestamp += timedelta(minutes=random.randint(1, 45))
            
            data.append({
                'user_id': user_id,
                'event': FUNNEL_STAGES[current_stage],
                'timestamp': timestamp,
                'device': device,
                'traffic_source': traffic_source,
                'country': country
            })
        else:
            break  
    
    user_counter += 1


funnel_df = pd.DataFrame(data)


funnel_df.to_csv('funnel_data.csv', index=False)

print(f"Dataset created with {len(funnel_df)} events from {N_USERS} users")
print("Sample data:")
print(funnel_df.head())

Generating funnel dataset...
Dataset created with 11495 events from 5000 users
Sample data:
  user_id         event           timestamp  device traffic_source country
0  user_1  landing_page 2023-05-21 04:38:00  Tablet          Email      CA
1  user_1  product_view 2023-05-21 05:00:00  Tablet          Email      CA
2  user_1   add_to_cart 2023-05-21 05:19:00  Tablet          Email      CA
3  user_2  landing_page 2023-06-03 20:59:00  Tablet       Paid Ads      BR
4  user_2  product_view 2023-06-03 21:21:00  Tablet       Paid Ads      BR
