<a href="https://colab.research.google.com/github/nirmalpate/DRIVALIA_A-B_TESTING_DISCOUNT_STRATEGY/blob/main/DRIVALIA_A_B_TESTING_DISCOUNT_STRATEGY.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [107]:
import pandas as pd
import numpy as np
from scipy import stats

In [108]:
# PARAMETERS AND SAMPLE SIZE CALCULATIONS (RELATIVE INCREASE 60% TO 85%)

In [129]:
p1 = 0.60
p2 = 0.85
alpha = 0.05
power = 0.95

z_alpha = stats.norm.ppf(1 - alpha)
z_beta  = stats.norm.ppf(power)

#SAMPLE SIZE CALCULATION

n_req = ((z_alpha*np.sqrt(2*p1*(1-p1)) + z_beta*np.sqrt(p2*(1-p2)))**2)/((p2-p1)**2)
n_req = int(np.ceil(n_req))

# 2. DATA GENERATION
np.random.seed(3000)
groups = ['Control'] * n_req + ['Treatment'] * n_req
data = {
    'user_id': range(1, 2 * n_req + 1),
    'group': groups,
    'base_price': np.random.uniform(50, 90, 2 * n_req),
    'rental_days': np.random.choice([1, 2, 3], 2 * n_req, p=[0.5, 0.3, 0.2])
}
df = pd.DataFrame(data)

In [130]:
df['daily_rate'] = np.where(df['group'] == 'Treatment', df['base_price'] * 0.80, df['base_price'])
duration_discount = np.where(df['rental_days'] == 2, 0.90, np.where(df['rental_days'] == 3, 0.80,1))
final_rate = df['daily_rate'] * duration_discount
df['final_rate'] = final_rate
df['final_revenue'] = df['final_rate'] * df['rental_days']
# Conversion/Utilisation: Using the observed 85% for Treatment and 60% for Control
df['converted'] = np.where(df['group'] == 'Control',
                           np.random.choice([0, 1], 2 * n_req, p=[0.40, 0.60]),
                           np.random.choice([0, 1], 2 * n_req, p=[0.15, 0.85]))
df['total_revenue'] = df['final_revenue'] * df['converted']
df

Unnamed: 0,user_id,group,base_price,rental_days,daily_rate,final_rate,final_revenue,converted,total_revenue
0,1,Control,70.431742,2,70.431742,63.388568,126.777136,0,0.000000
1,2,Control,73.424158,1,73.424158,73.424158,73.424158,0,0.000000
2,3,Control,78.450794,1,78.450794,78.450794,78.450794,1,78.450794
3,4,Control,63.025122,1,63.025122,63.025122,63.025122,0,0.000000
4,5,Control,70.812517,1,70.812517,70.812517,70.812517,0,0.000000
...,...,...,...,...,...,...,...,...,...
91,92,Treatment,67.211657,1,53.769326,53.769326,53.769326,1,53.769326
92,93,Treatment,83.240082,2,66.592066,59.932859,119.865718,1,119.865718
93,94,Treatment,58.739995,1,46.991996,46.991996,46.991996,1,46.991996
94,95,Treatment,55.077123,1,44.061698,44.061698,44.061698,1,44.061698


In [131]:
#HYPOTHESIS TESTING

#SUCCESS METRIC (CHAI_SQUARE TEST AND CONVERSION RATE)

In [132]:
contigency = pd.crosstab(df['group'], df['converted'])
contigency
chi2, p_val_conv, dof, ex = stats.chi2_contingency(contigency)
print('Chi-Square Statistic:', chi2)
print('p-value:', p_val_conv)

if p_val_conv < 0.05:
    print('Reject the null hypothesis. There is a significant difference in conversion rates between groups.')
else:
    print('Fail to reject the null hypothesis. There is no significant difference in conversion rates between groups.')

Chi-Square Statistic: 4.2725274725274724
p-value: 0.038733277045623134
Reject the null hypothesis. There is a significant difference in conversion rates between groups.


In [133]:
# Guardrail Metric: T-Test (Total Revenue per User)

In [134]:
rev_c = df[df['group'] == 'Control']['total_revenue']
rev_t = df[df['group'] == 'Treatment']['total_revenue']

t_stat, p_val_rev = stats.ttest_ind(rev_c, rev_t)

In [135]:
print('T-Statistic:', t_stat)
print('p-value:', p_val_rev)

T-Statistic: -1.0766131062291662
p-value: 0.28440851674173284


In [136]:
if p_val_rev < 0.05:
    print('Reject the null hypothesis. There is a significant difference in total revenue per user between groups.')
else:
    print('Fail to reject the null hypothesis. There is no significant difference in total revenue per user between groups.')

Fail to reject the null hypothesis. There is no significant difference in total revenue per user between groups.


### CHANGE SUCCESS MATRIX AND GUARDIAL MATRIX

In [137]:
# 1. PARAMETERS FOR REVENUE-BASED SAMPLE SIZE
# To detect a difference in means, we need the expected standard deviation

In [154]:
variance = 45
base_rev_avg  = df['total_revenue'].sum() / len(df['user_id'])
target_rev_avg = base_rev_avg * 1.20
delta =   base_rev_avg * 0.20
alpha = 0.05
power = 0.9
z_alpha = stats.norm.ppf(1 - alpha/2)
z_beta  = stats.norm.ppf(power)
variance = df['total_revenue'].var()

n_req2 = ((z_alpha + z_beta)**2 * (2 * variance)) / (delta**2)
n_req2 = int(np.ceil(n_req2))
n_req2




332

In [156]:
# 3. HYPOTHESIS TESTING
# SUCCESS METRIC: T-Test (Total Revenue per User)
rev_c = df[df['group'] == 'Control']['total_revenue']
rev_t = df[df['group'] == 'Treatment']['total_revenue']
t_stat, p_val_rev = stats.ttest_ind(rev_c, rev_t)

# GUARDRAIL METRIC: Chi-Square (Conversion Rate)
contingency = pd.crosstab(df['group'], df['converted'])
chi2, p_val_conv, dof, ex = stats.chi2_contingency(contingency)

# Output results
print(f"REQUIRED SAMPLE SIZE (FOR REVENUE): {n_req}")
print(f"Revenue P-Value (Success Metric): {p_val_rev:.4f}")
print(f"Conversion P-Value (Guardrail Metric): {p_val_conv:.4f}")
print(f"Avg Revenue Control: €{rev_c.mean():.2f}")
print(f"Avg Revenue Treatment: €{rev_t.mean():.2f}")

REQUIRED SAMPLE SIZE (FOR REVENUE): 48
Revenue P-Value (Success Metric): 0.2844
Conversion P-Value (Guardrail Metric): 0.0387
Avg Revenue Control: €63.46
Avg Revenue Treatment: €75.59


In [157]:
if p_val_rev < 0.05:
    print('Reject the null hypothesis. There is a significant difference in total revenue per user between groups.')
else:
    print('Fail to reject the null hypothesis. There is no significant difference in total revenue per user between groups.')

Fail to reject the null hypothesis. There is no significant difference in total revenue per user between groups.
