<a href="https://colab.research.google.com/github/nirmalpate/Bolt_Churn_Rate_Discount_A-B_Testing/blob/main/Bolt_Churn_Rate_Discount_A_B_Testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from scipy import stats

In [2]:
# SAMPLE SIZE CALCULATION

In [10]:
import numpy as np
from scipy import stats

def sample_size_calculation(base_mean, lift_pct, variance, alpha=0.05, power=0.80):
    z_alpha = stats.norm.ppf(1 - alpha/2)
    z_beta = stats.norm.ppf(power)

    delta = base_mean * (lift_pct / 100)

    # Corrected the denominator from (delta * 2) to (delta ** 2)
    n = ((z_alpha + z_beta)**2 * (2 * variance)) / (delta**2)

    return int(np.ceil(n))

# Example usage
required_sample = sample_size_calculation(base_mean=10, lift_pct=10, variance=10)
print(f"Required Sample Size: {required_sample}")
# --- STEP 2: CREATE MOCK DATA ---
# Let's simulate 400 users per group (exceeding our requirement)
n = 400
np.random.seed(42)

data = {
    'group': ['Control'] * n + ['Treatment'] * n,
    # Revenue (Continuous for T-Test)
    'revenue': np.concatenate([np.random.normal(10, 4.5, n),
                               np.random.normal(11.2, 4.5, n)]),
    # Converted (Categorical for Chi-Square: 1 = Booked, 0 = Didn't Book)
    'converted': np.concatenate([np.random.choice([0, 1], n, p=[0.7, 0.3]),
                                 np.random.choice([0, 1], n, p=[0.6, 0.4])])
}
df = pd.DataFrame(data)
df['revenue'] = df['revenue'].clip(lower = 0) # No Negetive Revenue
# T-Test For Revenue Difference
control_revenue = df[df['group'] == 'Control']['revenue']
treatment_revenue = df[df['group'] == 'Treatment']['revenue']

t_stat,p_val_t = stats.ttest_ind(control_revenue, treatment_revenue)
print(f"T-Test Results: T-statistic = {t_stat} and P-value = {p_val_t}")

# Chi_Square Test For Retention Rate
contigency_table = pd.crosstab(df['group'], df['converted'])

# Run Chi Square Test

chi2_stat, p_val_chi2, dof, expected = stats.chi2_contingency(contigency_table)
print(f"Chi-Square Test Results: Chi-Square Statistic = {chi2_stat} and P-value = {p_val_chi2}")

print("Change in Revenue")

if p_val_t < 0.05:
  print("There is a significant statistical difference in revenue between control and treqatment groups")
else:
  print("There is no significant statistical difference in revenue between control and treatment group")

print("Change in retention Rate")

if p_val_chi2 < 0.05:
  print("There is significant statistical diff between control and treatment group")
else:
  print("There is no significant statistical diff between control and treatment group")

Required Sample Size: 157
T-Test Results: T-statistic = -2.938423750276589 and P-value = 0.003393937056846898
Chi-Square Test Results: Chi-Square Statistic = 7.7543211948313475 and P-value = 0.005358417747688591
Change in Revenue
There is a significant statistical difference in revenue between control and treqatment groups
Change in retention Rate
There is significant statistical diff between control and treatment group


In [11]:
# Check Economic Viability

In [12]:
discount_value = 5

In [16]:
control_mean = df[df['group'] == 'Control']['revenue'].mean()
treatment_mean = df[df['group'] == 'Treatment']['revenue'].mean()

n_treatment = len(df[df['group'] == 'Treatment'])

incremental_revenue = (treatment_mean - control_mean) * n_treatment
print(f"Incremental Revenue: {incremental_revenue}")

# Total CAC (only who converted)

conversion_treatment = df[(df['group'] == 'Treatment') & (df['converted']==1)].shape[0]

total_cac = conversion_treatment * discount_value
print(f"Total CAC: {total_cac}")

net_profit = incremental_revenue - total_cac
print(f"net Profit is {net_profit}")

if net_profit > 0 and p_val_t < 0.05:
  print("Economic Viability")
else:
  print("Economic Unviability")

Incremental Revenue: 365.6837488659654
Total CAC: 835
net Profit is -469.3162511340346
Economic Unviability
