In [None]:
%pip install pandas
%pip install statsmodels
%pip install numpy

In [None]:
import pandas as pd
from statsmodels.stats.proportion import proportions_ztest
from statsmodels.stats.weightstats import ttest_ind
import numpy as np

def analyze_experiment_results(df: pd.DataFrame):
    """
    Analyzes the results of an A/B test from a user-metrics DataFrame.
    """
    control = df[df['variant_id'] == 'control']
    treatment = df[df['variant_id'] == 'treatment']

    print(f"Analysis for Experiment: {df['experiment_id'].iloc[0]}")
    print(f"Control Group Size: {len(control)}")
    print(f"Treatment Group Size: {len(treatment)}")
    print("-" * 30)

    # 1. Analyze Conversion Rate (a binomial metric)
    control_conversions = control['converted'].sum()
    treatment_conversions = treatment['converted'].sum()
    n_control = len(control)
    n_treatment = len(treatment)

    # Calculate p-value using a Z-test for proportions
    count = np.array([treatment_conversions, control_conversions])
    nobs = np.array([n_treatment, n_control])
    stat, p_value_conv = proportions_ztest(count, nobs, alternative='two-sided')

    # Calculate lift
    control_rate = control_conversions / n_control
    treatment_rate = treatment_conversions / n_treatment
    lift_conv = (treatment_rate - control_rate) / control_rate

    print("Metric: Conversion Rate")
    print(f"Control Rate: {control_rate:.4f}")
    print(f"Treatment Rate: {treatment_rate:.4f}")
    print(f"Lift: {lift_conv:+.2%}")
    print(f"P-value: {p_value_conv:.5f}")
    if p_value_conv < 0.05:
        print("Result: Statistically Significant")
    else:
        print("Result: Not Statistically Significant")
    print("-" * 30)


    # 2. Analyze Total Revenue (a continuous metric)
    # We use Welch's t-test, which does not assume equal variance.
    stat_rev, p_value_rev, dof_rev = ttest_ind(
        treatment['total_revenue'],
        control['total_revenue'],
        alternative='two-sided',
        usevar='unequal' # Welch's t-test
    )

    control_mean_rev = control['total_revenue'].mean()
    treatment_mean_rev = treatment['total_revenue'].mean()
    lift_rev = (treatment_mean_rev - control_mean_rev) / control_mean_rev

    print("Metric: Total Revenue per User")
    print(f"Control Mean: ${control_mean_rev:.2f}")
    print(f"Treatment Mean: ${treatment_mean_rev:.2f}")
    print(f"Lift: {lift_rev:+.2%}")
    print(f"P-value: {p_value_rev:.5f}")
    if p_value_rev < 0.05:
        print("Result: Statistically Significant")
    else:
        print("Result: Not Statistically Significant")
    print("-" * 30)


# --- Example Usage ---
# In a real pipeline, this DataFrame would be loaded from your data warehouse
# (e.g., Snowflake, BigQuery) after the dbt run is complete.
mock_data = {
    'user_id': [f'user_{i}' for i in range(20000)],
    'experiment_id': ['exp-new-checkout-flow-v2'] * 20000,
    'variant_id': ['control'] * 10000 + ['treatment'] * 10000,
    'converted': ([1] * 1000 + [0] * 9000) + ([1] * 1050 + [0] * 8950),
    'total_revenue': np.random.lognormal(4, 1.5, 10000).tolist() + np.random.lognormal(4.05, 1.5, 10000).tolist()
}
# Ensure revenue is zero for non-converters
mock_data['total_revenue'] = [rev if conv == 1 else 0 for rev, conv in zip(mock_data['total_revenue'], mock_data['converted'])]

df_results = pd.DataFrame(mock_data)

# Run the analysis
analyze_experiment_results(df_results)