# Import Libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import stats
import statsmodels.stats.proportion as sp

# Matplolib default parameters
from matplotlib import rcParams
plt.style.use('default')
plt.rcParams['axes.prop_cycle'] = plt.rcParamsDefault['axes.prop_cycle']

# import warning
import warnings
warnings.filterwarnings('ignore')

# Dataset Overview

## User Activity Data

In [116]:
# user activity dataset
df_act = pd.read_csv('activity_all.csv')
df_act.head()

Unnamed: 0,userid,dt,groupid,activity_level
0,a5b70ae7-f07c-4773-9df4-ce112bc9dc48,2021-10-01,0,0
1,d2646662-269f-49de-aab1-8776afced9a3,2021-10-01,0,0
2,c4d1cfa8-283d-49ad-a894-90aedc39c798,2021-10-01,1,0
3,6889f87f-5356-4904-a35a-6ea5020011db,2021-10-01,0,0
4,dbee604c-474a-4c9d-b013-508e5a0e3059,2021-10-01,1,0


In [3]:
print(f'Activity Dataset have {df_act.shape[0]} Rows and {df_act.shape[1]} Columns')

Activity Dataset have 3660000 Rows and 4 Columns


**Dictionary**
- `userid` : User unique identifier
- `dt` : datetime (date when user visit our platform)
- `groupid` : groupid 0 stand for control group, group id 1 refer to test group
- `activity_level` : level of activity (eg: How many times user visit our platform in 1 day). 0 activity level refer to no visit


In [7]:
df_act.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3660000 entries, 0 to 3659999
Data columns (total 4 columns):
 #   Column          Dtype 
---  ------          ----- 
 0   userid          object
 1   dt              object
 2   groupid         int64 
 3   activity_level  int64 
dtypes: int64(2), object(2)
memory usage: 111.7+ MB


**Convert to `dt` to datetime datatype**

In [117]:
df_act['dt'] = pd.to_datetime(df_act['dt'])

In [52]:
df_act.query('activity_level > 0').groupby(['dt', 'groupid']).count().reset_index().head()

Unnamed: 0,dt,groupid,userid,activity_level
0,2021-10-01,0,15337,15337
1,2021-10-01,1,15297,15297
2,2021-10-02,0,15354,15354
3,2021-10-02,1,15421,15421
4,2021-10-03,0,15423,15423


In [64]:
df_act['month'] = df_act['dt'].dt.month

In [65]:
df_act11 = df_act[df_act['month']==11]

In [28]:
import altair as alt

alt.Chart(df_act.query('activity_level > 0').groupby(['dt', 'groupid']).count().reset_index()).mark_line(size=3).encode(
    alt.X('dt'),
    alt.Y('userid'),
    color='groupid:O',
    tooltip=['userid']
).properties(
    width=600,
    height=400
)

In [67]:
print('Experimental Design Date Started : ', df_act11['dt'].min())
print('Experimental Design Date Finished : ', df_act['dt'].max())
print('The Number of Days Experimental Design Running : ', df_ctr['dt'].dt.day.max()-df_act11['dt'].dt.day.min())

Experimental Design Date Started :  2021-11-01 00:00:00
Experimental Design Date Finished :  2021-11-30 00:00:00
The Number of Days Experimental Design Running :  30


In [93]:
df_act.iloc[:,:-1].groupby(['groupid', 'dt']).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,activity_level,activity_level,activity_level,activity_level,activity_level,activity_level,activity_level,activity_level
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
groupid,dt,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
0,2021-10-01,29951.0,5.241762,6.516640,0.0,0.0,1.0,10.0,20.0
0,2021-10-02,29951.0,5.255885,6.509838,0.0,0.0,1.0,10.0,20.0
0,2021-10-03,29951.0,5.266068,6.511458,0.0,0.0,1.0,10.0,20.0
0,2021-10-04,29951.0,5.212447,6.511711,0.0,0.0,1.0,10.0,20.0
0,2021-10-05,29951.0,5.177590,6.512791,0.0,0.0,1.0,10.0,20.0
...,...,...,...,...,...,...,...,...,...
1,2021-11-26,30049.0,10.031216,5.770582,0.0,5.0,10.0,15.0,20.0
1,2021-11-27,30049.0,10.026024,5.774141,0.0,5.0,10.0,15.0,20.0
1,2021-11-28,30049.0,9.975307,5.788257,0.0,5.0,10.0,15.0,20.0
1,2021-11-29,30049.0,9.970781,5.799546,0.0,5.0,10.0,15.0,20.0


Look at the median, group 0 and group 1 show different activity level significantly (by business perspective)

## CTR Data

In [6]:
df_ctr = pd.read_csv('ctr_all.csv')
df_ctr.head()

Unnamed: 0,userid,dt,groupid,ctr
0,60389fa7-2d71-4cdf-831c-c2bb277ffa1e,2021-11-13,0,31.81
1,b59cb225-d160-4851-92d2-7cc8120a2f63,2021-11-13,0,30.46
2,aa336050-934e-453f-a5b0-dd881fcd114e,2021-11-13,0,34.25
3,8df767f4-a10f-4322-a722-676b7e02b372,2021-11-13,0,34.92
4,a74762ed-4da0-42ab-91d2-40d7e808dfe9,2021-11-13,0,34.95


In [7]:
print(f'CTR Dataset have {df_ctr.shape[0]} Rows and {df_ctr.shape[1]} Columns')

CTR Dataset have 2303408 Rows and 4 Columns


**Dictionary**
- `userid` : User unique identifier
- `dt` : datetime (date when user visit our platform)
- `groupid` : groupid 0 stand for control group, group id 1 refer to test group
- `ctr` : click through rate


In [14]:
df_ctr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2303408 entries, 0 to 2303407
Data columns (total 4 columns):
 #   Column   Dtype  
---  ------   -----  
 0   userid   object 
 1   dt       object 
 2   groupid  int64  
 3   ctr      float64
dtypes: float64(1), int64(1), object(2)
memory usage: 70.3+ MB


**Convert to `dt` to datetime datatype**

In [68]:
df_ctr['dt'] = pd.to_datetime(df_ctr['dt'])

In [70]:
data_ctr_avg = df_ctr.groupby(['groupid','dt']).mean().reset_index()

In [72]:
df_ctr['month'] = df_ctr['dt'].dt.month
df_ctr11 = df_ctr[df_ctr['month']==11]

In [71]:
alt.Chart(data_ctr_avg).mark_line(size=5).encode(
    alt.X('dt'),
    alt.Y('ctr'),
    color='groupid:O',
    tooltip=['ctr']
).properties(
    width=600,
    height=400
)

In [73]:
print('Experimental Design Date Started : ', df_ctr11['dt'].min())
print('Experimental Design Date Finished : ', df_ctr['dt'].max())
print('The Number of Days Experimental Design Running : ', df_ctr['dt'].dt.day.max()-df_ctr11['dt'].dt.day.min())

Experimental Design Date Started :  2021-11-01 00:00:00
Experimental Design Date Finished :  2021-11-30 00:00:00
The Number of Days Experimental Design Running :  30


In [94]:
df_ctr.iloc[:,:-1].groupby(['groupid', 'dt']).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,ctr,ctr,ctr,ctr,ctr,ctr,ctr,ctr
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
groupid,dt,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
0,2021-10-01,15337.0,32.980627,1.735883,30.0,31.45,32.99,34.49,36.0
0,2021-10-02,15354.0,33.004056,1.735763,30.0,31.49,33.01,34.50,36.0
0,2021-10-03,15423.0,33.002006,1.732401,30.0,31.51,33.00,34.51,36.0
0,2021-10-04,15211.0,32.990363,1.742652,30.0,31.48,32.98,34.51,36.0
0,2021-10-05,15126.0,33.014167,1.738912,30.0,31.52,33.03,34.52,36.0
...,...,...,...,...,...,...,...,...,...
1,2021-11-26,29303.0,37.997834,1.732989,35.0,36.50,37.99,39.50,41.0
1,2021-11-27,29350.0,37.978912,1.727602,35.0,36.48,37.97,39.47,41.0
1,2021-11-28,29273.0,37.992709,1.728559,35.0,36.50,37.98,39.49,41.0
1,2021-11-29,29289.0,37.987909,1.731204,35.0,36.49,37.97,39.49,41.0


Look at the median, group 0 and group 1 show different ctr significantly (by business perspective)

# Experiment : User Activity Level

## Define Experiment

**1) What is the name of the experiment?** <br>
AB Test New Design for Landing Page Design <br>
**2) Define Hypothesis** <br>
- H0 : New design won't increase DAU (Daily Active User) 
- H1 : New design will increase DAU (Daily Active User) <br>

**3) Who is the participant?** <br>
The user that visit landing page <br>
**4) What variables will be tested?** <br>
 Existing Design & New Design

## Define Metrics

Metric : **Daily Active Users**

## Define Sample Size

When we calculate sample size for daily active user, our sample becomes a number of days rather than number of user needed <br>
*note : (continuous sample = metric take value on a scale)

In [12]:
def continuos_sample_size(metric, mde, sd, alpha, beta):
    # standard normal distribution to determine z-values
    snd = stats.norm(0, 1)

    Z_beta = snd.ppf(1-beta)
    print(Z_beta)

    Z_alpha = snd.ppf(1-alpha/2)
    print(Z_alpha)

    N = (2 * sd**2 * 
             (Z_beta + Z_alpha)**2
             / mde**2)

    return print(f'Num of days (minimum) needed : {round(N)}')

# MDE = minimum detectable effect
# metric = mean of control group user
# sd = std

In [20]:
# we want to set MDE = 150 (0.5% increment from metric)
continuos_sample_size(metric=30673, mde=150, sd=91, alpha=0.05, beta=0.2)

0.8416212335729143
1.959963984540054
Num of days (minimum) needed : 6


**We need at least 6 days** obvservation for each group. **This is just for the sake of the exercise** and we keep use all the days given (because the available datasets are intended for a/b testing purposes, so when we use all of these datasets, it will not interfere with other users who are not tested)

## Define Duration

In [74]:
print('Experimental Design Date Started : ', df_act11['dt'].min())
print('Experimental Design Date Finished : ', df_act['dt'].max())
print('The Number of Days Experimental Design Running : ', df_ctr['dt'].dt.day.max()-df_act11['dt'].dt.day.min())

Experimental Design Date Started :  2021-11-01 00:00:00
Experimental Design Date Finished :  2021-11-30 00:00:00
The Number of Days Experimental Design Running :  30


We will run this experiment **for 30 days**

## Preparing Data

In [137]:
# filter only activity level > 0
data_act_count = df_act.query('activity_level > 0').groupby(['dt', 'groupid']).count().reset_index()
data_act_count.head()

Unnamed: 0,dt,groupid,userid,activity_level
0,2021-10-01,0,15337,15337
1,2021-10-01,1,15297,15297
2,2021-10-02,0,15354,15354
3,2021-10-02,1,15421,15421
4,2021-10-03,0,15423,15423


In [188]:
# Data before experiment
before = data_act_count.query('dt < "2021-11-01"')

# Data during experiment
after = data_act_count.query('dt >= "2021-11-01"')

## Pre Test Before Experiment

In [189]:
print(f'Mean Daily Active User for Control Group Before Experiment :', np.mean(before.query('groupid == 0')['userid'].to_numpy()))
print(f'Mean Daily Active User for Test Group Before Experiment :', np.mean(before.query('groupid == 1')['userid'].to_numpy()))

Mean Daily Active User for Control Group Before Experiment : 15320.870967741936
Mean Daily Active User for Test Group Before Experiment : 15352.516129032258


In [190]:
from scipy.stats import ttest_ind

res = ttest_ind(before.query('groupid == 0')['userid'].to_numpy(), before.query('groupid == 1')['userid']
                .to_numpy()).pvalue

print('Pvalue :', res)

Pvalue : 0.1630842353828083


P-values is very high, it indicates that the number between the group not exactly the same (if the number between 2 group is sampe it means pvalue = 1),such a **high pvalues** shows that there is very likely **no difference** between those two group

## Test Experiment

In [191]:
print(f'Mean Daily Active User for Control Group During Experiment :', np.mean(after.query('groupid == 0')['userid'].to_numpy()))
print(f'Mean Daily Active User for Test Group During Experiment :', np.mean(after.query('groupid == 1')['userid'].to_numpy()))

Mean Daily Active User for Control Group During Experiment : 15782.0
Mean Daily Active User for Test Group During Experiment : 29302.433333333334


In [192]:
pvalue = ttest_ind(after.query('groupid == 0')['userid'].to_numpy(), after.query('groupid == 1')['userid']
                .to_numpy()).pvalue

print(f'pvalue :  {"{:.100f}".format(pvalue)}')

if pvalue >= 0.05:
    print('Insufficient Reject H0')
else:
    print('Sufficient Reject H0')

pvalue :  0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000065906035841072442
Sufficient Reject H0


Then we will decide that the average difference between daily active user new design and daily active user existing design is very significant. Which means that the **average daily active user new design is higher** than the existing design.

# Experiment : Click Trough Rate

## Define Experiment

**1) What is the name of the experiment?** <br>
AB Test New Design for Landing Page Design <br>
**2) Define Hypothesis** <br>
- H0 : New design won't increase CTR (Click Trough Rate) 
- H1 : New design will increase CTR (Click Trough Rate) <br>

**3) Who is the participant?** <br>
The user that visit landing page <br>
**4) What variables will be tested?** <br>
 Existing Design & New Design

## Define Metrics

**Metrics** : Click Trough Rate

## Define Sample Size

In [157]:
def binomial_sample_size(metric, mde, alpha, beta):
    # standard normal distribution to determine z-values
    snd = stats.norm(0, 1)

    Z_beta = snd.ppf(1-beta)
    print('Z_beta :', Z_beta)

    Z_alpha = snd.ppf(1-alpha/2)
    print('Z_alpha :', Z_alpha)

    # average of probabilities from both groups
    p = (metric + metric+mde) / 2
    print('p :', p)
    print('\n')

    N = (2 * p * 
             (1 - p) * 
             (0.84 + 1.96)**2
             / mde**2)

    return print(f'Num of samples (at least) needed : {round(N)}')

In [165]:
# we want to set MDE = 0.02 (about 7% increment from metric)
binomial_sample_size(0.33, 0.02, 0.05, 0.2)

Z_beta : 0.8416212335729143
Z_alpha : 1.959963984540054
p : 0.34


Num of samples (at least) needed : 8796


**We need at least 8796 user samples** obvservation for each group. **This is just for the sake of the exercise** and we keep use all the users given (because the available datasets are intended for a/b testing purposes, so when we use all of these datasets, it will not interfere with other users who are not tested)

## Define Duration

In [166]:
print('Experimental Design Date Started : ', df_ctr11['dt'].min())
print('Experimental Design Date Finished : ', df_ctr['dt'].max())
print('The Number of Days Experimental Design Running : ', df_ctr['dt'].dt.day.max()-df_ctr11['dt'].dt.day.min())

Experimental Design Date Started :  2021-11-01 00:00:00
Experimental Design Date Finished :  2021-11-30 00:00:00
The Number of Days Experimental Design Running :  30


We will run this experiment **for 30 days**

## Preparing Data

In [170]:
data_ctr_avg = df_ctr.groupby(['groupid','dt']).mean().reset_index()
data_ctr_avg.head()

Unnamed: 0,groupid,dt,ctr,month
0,0,2021-10-01,32.980627,10.0
1,0,2021-10-02,33.004056,10.0
2,0,2021-10-03,33.002006,10.0
3,0,2021-10-04,32.990363,10.0
4,0,2021-10-05,33.014167,10.0


In [193]:
# define before and after experiment data
before = df_ctr.query('dt < "2021-11-01"')[['groupid', 'ctr']]
after = df_ctr.query('dt >= "2021-11-01"')[['groupid', 'ctr']]

## Pre Test Before Experiment

In [173]:
print(f'Mean Click Trough Rate for Control Group Before Experiment :', before.query('groupid == 0')['ctr'].to_numpy().mean())
print(f'Mean Click Trough Rate for Test Group Before Experiment :', before.query('groupid == 1')['ctr'].to_numpy().mean())

Mean Click Trough Rate for Control Group Before Experiment : 33.00091277553074
Mean Click Trough Rate for Test Group Before Experiment : 32.99957172093258


In [174]:
print(f'Std Click Trough Rate for Control Group Before Experiment :', before.query('groupid == 0')['ctr'].to_numpy().std())
print(f'Std Click Trough Rate for Test Group Before Experiment :', before.query('groupid == 1')['ctr'].to_numpy().std())

Std Click Trough Rate for Control Group Before Experiment : 1.7336979501682888
Std Click Trough Rate for Test Group Before Experiment : 1.7296548367391134


In [179]:
pvalue = ttest_ind(before.query('groupid == 0')['ctr'].to_numpy(), before.query('groupid == 1')['ctr']
                .to_numpy()).pvalue

print('pvalue :', pvalue)

pvalue : 0.705741417344299


P-values is very high, it indicates that the number between the group not exactly the same (if the number between 2 group is same it means pvalue = 1),such a **high pvalues** shows that there is very likely **no difference** between those two group

## Test Experiment

In [175]:
print(f'Mean Click Trough Rate for Control Group During Experiment :', after.query('groupid == 0')['ctr'].to_numpy().mean())
print(f'Mean Click Trough Rate for Test Group During Experiment :', after.query('groupid == 1')['ctr'].to_numpy().mean())

Mean Click Trough Rate for Control Group During Experiment : 32.996977569382835
Mean Click Trough Rate for Test Group During Experiment : 37.99695912626142


In [176]:
print(f'Std Click Trough Rate for Control Group During Experiment :', after.query('groupid == 0')['ctr'].to_numpy().std())
print(f'Std Click Trough Rate for Test Group During Experiment :', after.query('groupid == 1')['ctr'].to_numpy().std())

Std Click Trough Rate for Control Group During Experiment : 1.7331985918552912
Std Click Trough Rate for Test Group During Experiment : 1.7323710606903675


In [195]:
pvalue = ttest_ind(after.query('groupid == 0')['ctr'].to_numpy(), after.query('groupid == 1')['ctr']
                .to_numpy()).pvalue

print(f'pvalue :  {"{:.100f}".format(pvalue)}')

if pvalue >= 0.05:
    print('Insufficient Reject H0')
else:
    print('Sufficient Reject H0')

pvalue :  0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
Sufficient Reject H0


Then we will decide that the average difference between ctr in new design and ctr in existing design is very significant. Which means that the **ctr in new design is significantly higher** than the existing design.