In [1]:
import pandas as pd
import numpy as np
from statsmodels.stats.proportion import proportions_ztest
from scipy.stats import chi2_contingency
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,Reminder Frequency,Voucher Discounts,Target Users,Redeemed Users
0,Non-Frequent,10K,3043,167
1,Frequent,10K,3141,204
2,Non-Frequent,15K,3219,204
3,Frequent,15K,2928,266
4,Non-Frequent,20K,2823,299


In [3]:
df['redeem/target%'] = (df['Redeemed Users'] / df['Target Users']) * 100
df['Non Redeem Users'] = df['Target Users'] - df['Redeemed Users'] 
df = df[df['Reminder Frequency'] != 'Control Group']
freq = df[['Reminder Frequency', 'Voucher Discounts', 'redeem/target%']].groupby(['Reminder Frequency','Voucher Discounts']).sum()
freq =  pd.pivot_table(freq, values = 'redeem/target%', index = ['Reminder Frequency'], columns = ['Voucher Discounts'], aggfunc = np.sum, fill_value = 0)
freq 

Voucher Discounts,10K,15K,20K,25K
Reminder Frequency,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Frequent,6.494747,9.084699,12.068966,17.644887
Non-Frequent,5.488005,6.337372,10.591569,12.288687


In [4]:
reminder_freq = df[['Reminder Frequency', 'Redeemed Users', 'Target Users']].groupby('Reminder Frequency').sum().reset_index()
reminder_freq['redeem/target%'] = (reminder_freq['Redeemed Users'] / reminder_freq['Target Users']) * 100
reminder_freq

Unnamed: 0,Reminder Frequency,Redeemed Users,Target Users,redeem/target%
0,Frequent,1270,11446,11.095579
1,Non-Frequent,1048,12161,8.617712


In [5]:
##Optimal Reminder Scheme

## HO: p1 = p2 
## H1: p1 > p2 [Frequent Reminder Scheme Redeem% > Non-Frequent Reminder Scheme Redeem%]

significance = 0.05

sample_success_a, sample_size_a = (1270, 11446)
sample_success_b, sample_size_b = (1048, 12161)

successes = np.array([sample_success_a, sample_success_b])
samples = np.array([sample_size_a, sample_size_b])

stat, p_value = proportions_ztest(count=successes, nobs=samples,  alternative='larger')
# report
print('z_stat: %0.3f, p_value: %0.3f' % (stat, p_value))
if p_value > significance:
   print ("Fail to reject the null hypothesis - we have nothing else to say")
else:
   print ("Reject the null hypothesis - suggest the alternative hypothesis is true")

z_stat: 6.394, p_value: 0.000
Reject the null hypothesis - suggest the alternative hypothesis is true


In [6]:
voucher_freq = df[['Voucher Discounts', 'Redeemed Users', 'Target Users']].groupby('Voucher Discounts').sum().reset_index()
voucher_freq['redeem/target%'] = (voucher_freq['Redeemed Users'] / voucher_freq['Target Users']) * 100
voucher_freq

Unnamed: 0,Voucher Discounts,Redeemed Users,Target Users,redeem/target%
0,10K,371,6184,5.999353
1,15K,470,6147,7.646006
2,20K,621,5491,11.309415
3,25K,856,5785,14.796889


In [7]:
df['voucher discount'] = np.where(df['Voucher Discounts'].isin(['10K', '15K', '20K']), 'Other', df['Voucher Discounts'])
voucher_freq = df[['voucher discount', 'Redeemed Users', 'Target Users']].groupby('voucher discount').sum().reset_index()
voucher_freq['redeem/target%'] = (voucher_freq['Redeemed Users'] / voucher_freq['Target Users']) * 100
voucher_freq

Unnamed: 0,voucher discount,Redeemed Users,Target Users,redeem/target%
0,25K,856,5785,14.796889
1,Other,1462,17822,8.203344


In [8]:
##Optimal Voucher Discount

## HO: p1 = p2 
## H1: p1 > p2 [Voucher discount 25K Redeem% > Other Voucher discount groups Redeem%]

significance = 0.05

sample_success_a, sample_size_a = (856, 5785)
sample_success_b, sample_size_b = (1462, 17822)

successes = np.array([sample_success_a, sample_success_b])
samples = np.array([sample_size_a, sample_size_b])

stat, p_value = proportions_ztest(count=successes, nobs=samples,  alternative='larger')
# report
print('z_stat: %0.3f, p_value: %0.3f' % (stat, p_value))
if p_value > significance:
   print ("Fail to reject the null hypothesis - we have nothing else to say")
else:
   print ("Reject the null hypothesis - suggest the alternative hypothesis is true")

z_stat: 14.643, p_value: 0.000
Reject the null hypothesis - suggest the alternative hypothesis is true


In [9]:
##Optimal Voucher Discount

## HO: p1 = p2 
## H1: p1 > p2 [Voucher discount 25K Redeem% > Voucher discount 20K Redeem%]

significance = 0.05

sample_success_a, sample_size_a = (856, 5785)
sample_success_b, sample_size_b = (621, 5491)

successes = np.array([sample_success_a, sample_success_b])
samples = np.array([sample_size_a, sample_size_b])

stat, p_value = proportions_ztest(count=successes, nobs=samples,  alternative='larger')
# report
print('z_stat: %0.3f, p_value: %0.3f' % (stat, p_value))
if p_value > significance:
   print ("Fail to reject the null hypothesis - we have nothing else to say")
else:
   print ("Reject the null hypothesis - suggest the alternative hypothesis is true")

z_stat: 5.486, p_value: 0.000
Reject the null hypothesis - suggest the alternative hypothesis is true
