In [35]:
import pandas as pd
import numpy as np

In [36]:
exposure_events = pd.read_csv("../data/exposure_events.csv")
user_events = pd.read_csv("../data/user_events_conversion.csv")
user_info = pd.read_csv("../data/user_info.csv")

In [37]:
exposure_events.head()

Unnamed: 0.1,Unnamed: 0,user_id,experiment_id,variant,exposure_time
0,0,4772,0,A,2025-01-02 01:12:51
1,1,6597,0,A,2025-01-10 21:19:04
2,2,5833,0,A,2025-01-09 05:36:59
3,3,1117,0,A,2025-01-07 21:22:17
4,4,1307,0,A,2025-01-03 18:56:33


In [38]:
user_events.head()

Unnamed: 0.1,Unnamed: 0,user_id,event_name,event_time,event_value
0,0,4772,session_start,2025-01-03 01:12:51,
1,1,4772,scroll,2025-01-03 01:12:51,
2,2,4772,scroll,2025-01-02 01:12:51,
3,3,4772,scroll,2025-01-02 01:12:51,
4,4,4772,scroll,2025-01-03 01:12:51,


In [39]:
user_info.head()

Unnamed: 0.1,Unnamed: 0,user_id,device,country
0,0,0,android,Germany
1,1,1,ios,Netherlands
2,2,2,web,China
3,3,3,android,Italy
4,4,4,ios,Australia


## Data analysis + parse metrics from JSON config file

In [40]:
import json

with open('../data/metric_definition.json', 'r') as f:
    metrics = json.load(f)

In [41]:
def compute_metric(exposure_events, user_events, metric_config):
    """
    1. Parse metric_config
    2. Filter events by name
    3. Filter by time window relative to exposure_time
    4. Aggregate per user based on aggregation type
    """

    # make sure datetime columns are datetime type

    exposure_events = exposure_events.copy()
    user_events = user_events.copy()

    exposure_events['exposure_time'] = pd.to_datetime(exposure_events['exposure_time'])
    user_events['event_time'] = pd.to_datetime(user_events['event_time'])
    
    # Filter events by name
    event_name = metric_config['event']['name']
    relevant_events = user_events[user_events['event_name'] == event_name].copy()

    # Join with exposures to filter by window
    merged = relevant_events.merge(
        exposure_events[['user_id', 'exposure_time', 'variant', 'experiment_id']],
        on='user_id'
    )

    # calculate time since exposure
    merged['time_since_exposure'] = merged['event_time'] - merged['exposure_time']

    # parse window
    start = pd.Timedelta(metric_config['window']['start'])
    end = pd.Timedelta(metric_config['window']['end'])

    in_window = merged[
        (merged['time_since_exposure'] >= start) &
        (merged['time_since_exposure'] <= end)
    ]

    # aggregate
    agg_type = metric_config['aggregation']
    all_users = exposure_events[['user_id', 'variant', 'experiment_id']].copy()
    
    if agg_type == 'binary':
        converted_users = in_window.groupby(['user_id', 'variant', 'experiment_id']).size().reset_index(name='metric_value')
        converted_users['metric_value'] = 1

        result = all_users.merge(converted_users, how='left', on=['user_id', 'variant', 'experiment_id'])
        result['metric_value'] = result['metric_value'].fillna(0)
        
    elif agg_type == 'sum':
        # users with purchase events, and therefore, event_value has revenue values
        purchased_users = in_window.groupby(['user_id', 'variant', 'experiment_id'])['event_value'].sum().reset_index(name='metric_value')
        
        result = all_users.merge(purchased_users, how='left', on=['user_id', 'variant', 'experiment_id'])
        result['metric_value'] = result['metric_value'].fillna(0)

    elif agg_type == 'count':
        result = in_window.groupby(['user_id', 'variant', 'experiment_id']).size().reset_index(name='metric_value')
        result = all_users.merge(result, how='left', on=['user_id', 'variant', 'experiment_id'])
        result['metric_value'] = result['metric_value'].fillna(0)

    return result[['user_id', 'variant', 'experiment_id', 'metric_value']]

## Apply tests

In [50]:
from scipy import stats

In [52]:
def analyze_experiment(metric_df, metric_config):
    """
    Given metric values, run appropriate statistical test
    """

    variant_a = metric_df[metric_df['variant'] == 'A']['metric_value']
    variant_b = metric_df[metric_df['variant'] == 'B']['metric_value']

    agg_type = metric_config['aggregation']

    if agg_type == 'binary':
        # chi-square test for conversion rate
        conversions_a = variant_a.sum()
        conversions_b = variant_b.sum()
        n_a = len(variant_a)
        n_b = len(variant_b)

        contingency = [
            [conversions_a, n_a - conversions_a],
            [conversions_b, n_b - conversions_b]
        ]
        chi2, p_value, _, _ = stats.chi2_contingency(contingency)

        return {
            'test': 'chi-square',
            'statistic': chi2,
            'p-value': p_value,
            'variant_a_rate': conversions_a / n_a,
            'variant_b_rate': conversions_b / n_b,
            'lift': (conversions_b / n_b) / (conversions_a / n_a) - 1
        }
    else: # sum or count
        # two-sample t-test
        t_stat, p_value = stats.ttest_ind(variant_a, variant_b)

        return {
            'test': 't-test',
            'statistic': t_stat,
            'p-value': p_value,
            'variant_a_mean': variant_a.mean(),
            'variant_b_mean': variant_b.mean(),
            'lift': (variant_b.mean() / variant_a.mean()) - 1 if variant_a.mean() > 0 else None
        }

## display results

In [54]:
for metric_key, metric_config in metrics.items():
    print("*" * 50)
    print(f"Processing {metric_config['display_name']}")
    result = compute_metric(exposure_events, user_events, metric_config)
    print(result.head())
    summary = result.groupby('variant')['metric_value'].agg(['mean', 'std', 'count'])
    print(summary)

    analysis = analyze_experiment(result, metric_config)
    print(f"1. Test: {analysis['test']}")
    print(f"2. p-value: {analysis['p-value']}")
    print(f"3. Lift: {analysis.get('lift', 0)*100:.1f}%")
    print(f"4. Significant: {'YES' if analysis['p-value'] < 0.05 else 'NO'}")

**************************************************
Processing 7-day Conversion Rate
   user_id variant  experiment_id  metric_value
0     4772       A              0           0.0
1     6597       A              0           0.0
2     5833       A              0           0.0
3     1117       A              0           0.0
4     1307       A              0           0.0
           mean       std  count
variant                         
A        0.1062  0.308124   5000
B        0.1270  0.333006   5000
1. Test: chi-square
2. p-value: 0.0013306221953144962
3. Lift: 19.6%
4. Significant: YES
**************************************************
Processing 14-day Revenue
   user_id variant  experiment_id  metric_value
0     4772       A              0           0.0
1     6597       A              0           0.0
2     5833       A              0           0.0
3     1117       A              0           0.0
4     1307       A              0           0.0
             mean        std  count
varian

In [None]:
def run_experiment_analysis(experiment_id, exposures_df, events_df, metrics_config):
    """
    Complete analysis pipeline for 1 experiment
    """

    exp_exposures = exposures_df[exposures_df['experiment_id'] == experiment_id]

    results = {}

    for metric_key, metric_config in metrics_config.items():
        metric_df = compute_metric(exp_exposures, events_df, metric_config)
        analysis = analyze_experiment(metric_df, metric_config)
        results[metric_config['metric_id']] = analysis
    
    results['significant'] = "YES" if results["p-value"] < 0.05 else "NO"

    return results

In [56]:
results = run_experiment_analysis(0, exposure_events, user_events, metrics)
print(json.dumps(results, indent=2))

{
  "conversion_7d": {
    "test": "chi-square",
    "statistic": 1.7686072218128224,
    "p-value": 0.18355454026436843,
    "variant_a_rate": 0.1088,
    "variant_b_rate": 0.1212,
    "lift": 0.11397058823529416
  },
  "revenue_14d": {
    "test": "t-test",
    "statistic": -2.609630450232982,
    "p-value": 0.009091034202106147,
    "variant_a_mean": 3.8653730663865162,
    "variant_b_mean": 4.716658362094035,
    "lift": 0.2202336698391003
  },
  "session_7d": {
    "test": "t-test",
    "statistic": 0.5843800176605515,
    "p-value": 0.5589910430721731,
    "variant_a_mean": 2.5208,
    "variant_b_mean": 2.4912,
    "lift": -0.0117423040304665
  }
}
