In [None]:

import pandas as pd
import numpy as np
from scipy import stats
import plotly.graph_objects as go

# --- Step 1: Simulate A/B test data ---
np.random.seed(42)
n_A = 1000  # impressions for headline A
n_B = 1000  # impressions for headline B
p_A = 0.12  # true conversion rate for headline A
p_B = 0.16  # true conversion rate for headline B

# Simulate conversions
conversions_A = np.random.binomial(1, p_A, n_A)
conversions_B = np.random.binomial(1, p_B, n_B)

# Create time series of impressions and conversions (simulated chronological order)
data_A = pd.DataFrame({'variant': 'A', 'conversion': conversions_A})
data_B = pd.DataFrame({'variant': 'B', 'conversion': conversions_B})
data = pd.concat([data_A, data_B], ignore_index=True)

# --- Step 2: Compute conversion rates and standard errors ---
impressions_A = len(data_A)
impressions_B = len(data_B)
conv_A = conversions_A.sum()
conv_B = conversions_B.sum()
cr_A = conv_A / impressions_A
cr_B = conv_B / impressions_B

# standard errors
se_A = np.sqrt(cr_A * (1 - cr_A) / impressions_A)
se_B = np.sqrt(cr_B * (1 - cr_B) / impressions_B)

# --- Step 3: Two-proportion z-test ---
p_pool = (conv_A + conv_B) / (impressions_A + impressions_B)
se_pool = np.sqrt(p_pool * (1 - p_pool) * (1/impressions_A + 1/impressions_B))
z_stat = (cr_A - cr_B) / se_pool
p_val = 2 * (1 - stats.norm.cdf(abs(z_stat)))

# --- Step 4: Confidence intervals (95%) ---
ci_A = (cr_A - 1.96*se_A, cr_A + 1.96*se_A)
ci_B = (cr_B - 1.96*se_B, cr_B + 1.96*se_B)

summary = pd.DataFrame({
    'Variant': ['A', 'B'],
    'Impressions': [impressions_A, impressions_B],
    'Conversions': [conv_A, conv_B],
    'Conversion Rate': [cr_A, cr_B],
    '95% CI Lower': [ci_A[0], ci_B[0]],
    '95% CI Upper': [ci_A[1], ci_B[1]]
})

summary['z-stat'] = [z_stat, z_stat]
summary['p-value'] = [p_val, p_val]

summary


In [None]:

# --- Step 5: Visualizations ---
# Bar chart with CIs
fig_bar = go.Figure()
fig_bar.add_trace(go.Bar(
    x=['A','B'], y=[cr_A, cr_B],
    error_y=dict(type='data', array=[ci_A[1]-cr_A, ci_B[1]-cr_B]),
    name='Conversion Rate'
))
fig_bar.update_layout(title='Conversion Rates with 95% CI', yaxis=dict(title='Conversion Rate'))
fig_bar.show()

# Cumulative conversions over time
data_A['cum_conv'] = data_A['conversion'].cumsum()
data_B['cum_conv'] = data_B['conversion'].cumsum()
data_A['index'] = range(1, impressions_A+1)
data_B['index'] = range(1, impressions_B+1)
fig_cum = go.Figure()
fig_cum.add_trace(go.Scatter(x=data_A['index'], y=data_A['cum_conv'], mode='lines', name='A'))
fig_cum.add_trace(go.Scatter(x=data_B['index'], y=data_B['cum_conv'], mode='lines', name='B'))
fig_cum.update_layout(title='Cumulative Conversions Over Time', xaxis=dict(title='Impressions'), yaxis=dict(title='Cumulative Conversions'))
fig_cum.show()


In [None]:

# --- Step 6: Recommendation logic ---
alpha = 0.05
if p_val < alpha:
    if cr_B > cr_A:
        print(f"Variation B outperforms A significantly (p-value={p_val:.4f}). Recommend adopting B.")
    else:
        print(f"Variation A outperforms B significantly (p-value={p_val:.4f}). Recommend sticking with A.")
else:
    print(f"No statistically significant difference (p-value={p_val:.4f}). Keep running the test or collect more data.")
