In [1]:
# A/B Test Analysis: ARPI_D1 and D1 Retention

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
from statsmodels.stats.power import TTestIndPower, NormalIndPower

# Data
n_control = 10000
n_test = 10000

revenue_control = 4500
revenue_test = 4800

retained_control = 3200
retained_test = 3500

# 1. Calculate ARPI_D1 and D1 Retention rates
arpi_control = revenue_control / n_control
arpi_test = revenue_test / n_test

retention_control = retained_control / n_control
retention_test = retained_test / n_test


# 2. Statistical significance tests

# Assume ARPI_D1 std dev is 10% of mean (since none given)
std_arpi_control = arpi_control * 0.1
std_arpi_test = arpi_test * 0.1


# t-test for ARPI_D1
se_arpi = np.sqrt(std_arpi_control**2/n_control + std_arpi_test**2/n_test)
t_stat_arpi = (arpi_test - arpi_control) / se_arpi
p_value_arpi = 2 * (1 - stats.norm.cdf(np.abs(t_stat_arpi)))


# D1 Retention: Two-proportion z-test
p1 = retention_control
p2 = retention_test
p_pool = (retained_control + retained_test) / (n_control + n_test)
se_ret = np.sqrt(p_pool * (1 - p_pool) * (1/n_control + 1/n_test))
z_stat_ret = (p2 - p1) / se_ret
p_value_ret = 2 * (1 - stats.norm.cdf(np.abs(z_stat_ret))) #two tailed test formula


# 3. Power calculation

# a) ARPI_D1 power
effect_size_arpi = (arpi_test - arpi_control) / np.sqrt((std_arpi_control**2 + std_arpi_test**2) / 2) # Cohen’s d formula
alpha = 0.05
power_arpi = stats.norm.cdf(effect_size_arpi/np.sqrt(2) - stats.norm.ppf(1-alpha/2))

# b) D1 Retention power
effect_size_ret = 2 * (np.arcsin(np.sqrt(retention_test)) - np.arcsin(np.sqrt(retention_control))) # Cohen’s h formula
power_ret = NormalIndPower().power(effect_size=effect_size_ret, nobs1=n_test, alpha=0.05, ratio=1)



# 3. Summary of results
summary = f"""
A/B Test Summary:

1. ARPI D1:
   - Control: {arpi_control:.4f}
   - Test: {arpi_test:.4f}
   - t-statistic: {t_stat_arpi:.2f}
   - p-value: {p_value_arpi:.2f}
   - effect_size_arpi: {effect_size_arpi:.4f}
   - Power: {power_arpi:.3f}

2. D1 Retention:
   - Control: {retention_control:.4f}
   - Test: {retention_test:.4f}
   - z-statistic: {z_stat_ret:.4f}
   - p-value: {p_value_ret:.4f}
   - effect_size_ret: {effect_size_ret:.4f}
   - Power: {power_ret:.4f}

Recommendations:
- ARPI D1 shows {"statistically significant" if p_value_arpi < 0.05 else "no statistically significant"} improvement.
- D1 Retention shows {"statistically significant" if p_value_ret < 0.05 else "no statistically significant"} improvement.
- {"Sufficient" if power_arpi > 0.8 and effect_size_ret > 0.8 else "Insufficient"} statistical power suggests the sample size is {"adequate" if power_arpi > 0.8 and effect_size_retention > 0.8 else "inadequate"}.


Should the feature be rolled out?
  - {"Yes, both metrics show significant improvement and sufficient power." if (p_value_arpi < 0.05 and p_value_ret < 0.05 and power_arpi > 0.8 and power_ret > 0.8) else "No, more data or further testing is needed."}

Is there enough evidence?
  - {"Yes" if (p_value_arpi < 0.05 and p_value_ret < 0.05 and power_arpi > 0.8 and power_ret > 0.8) else "No"}

If not significant, what next?
  - Increase sample size to achieve sufficient power and statistical significance.

"""

print(summary)


A/B Test Summary:

1. ARPI D1:
   - Control: 0.4500
   - Test: 0.4800
   - t-statistic: 45.60
   - p-value: 0.00
   - effect_size_arpi: 0.6448
   - Power: 0.066

2. D1 Retention:
   - Control: 0.3200
   - Test: 0.3500
   - z-statistic: 4.4944
   - p-value: 0.0000
   - effect_size_ret: 0.0636
   - Power: 0.9944

Recommendations:
- ARPI D1 shows statistically significant improvement.
- D1 Retention shows statistically significant improvement.
- Insufficient statistical power suggests the sample size is inadequate.


Should the feature be rolled out?
  - No, more data or further testing is needed.

Is there enough evidence?
  - No

If not significant, what next?
  - Increase sample size to achieve sufficient power and statistical significance.


