In [1]:
import pandas as pd
import numpy as np
import scipy
import random

import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
file_name = '../data/AdSmartABdata.csv'
ad_df = pd.read_csv(file_name)

## Exposed and control Data

### clean out usesrs who don't respond to the experiment

In [12]:
clean_ad_df = ad_df[(ad_df['yes'] == 1) ^ (ad_df['no'] == 1)]
clean_ad_df['converted'] = clean_ad_df.yes

clean_control = clean_ad_df.groupby('experiment').get_group('control')
clean_exposed = clean_ad_df.groupby('experiment').get_group('exposed')
# clean_ad_df

In [13]:
clean_ad_df.head()

Unnamed: 0,auction_id,experiment,date,hour,device_make,platform_os,browser,yes,no,converted
2,0016d14a-ae18-4a02-a204-6ba53b52f2ed,exposed,2020-07-05,2,E5823,6,Chrome Mobile WebView,0,1,0
16,008aafdf-deef-4482-8fec-d98e3da054da,exposed,2020-07-04,16,Generic Smartphone,6,Chrome Mobile,1,0,1
20,00a1384a-5118-4d1b-925b-6cdada50318d,exposed,2020-07-06,8,Generic Smartphone,6,Chrome Mobile,0,1,0
23,00b6fadb-10bd-49e3-a778-290da82f7a8d,control,2020-07-08,4,Samsung SM-A202F,6,Facebook,1,0,1
27,00ebf4a8-060f-4b99-93ac-c62724399483,control,2020-07-03,15,Generic Smartphone,6,Chrome Mobile,0,1,0


In [34]:
print("***********************")
print("** Controled Summary **")
print("***********************")
print()

summary_control = clean_control.agg({'auction_id': 'count', 'converted':'sum'})
summary_control['total'] = summary_control['auction_id']
summary_control.drop('auction_id', inplace=True)

print(f"Total Controls={summary_control[1]} \t Converted={summary_control[0]}")

***********************
** Controled Summary **
***********************

Total Controls=586 	 Converted=264


In [35]:
print("***********************")
print("** Exposed Summary ****")
print("***********************")
print()

summary_exposed = clean_exposed.agg({'auction_id': 'count', 'converted':'sum'})
summary_exposed['total'] = summary_exposed['auction_id']
summary_exposed.drop('auction_id', inplace=True)
summary_exposed

print(f"Total Controls={summary_exposed[1]} \t Converted={summary_exposed[0]}")

***********************
** Exposed Summary ****
***********************

Total Controls=657 	 Converted=308


In [36]:
print("**********************")
print("** Convertion Rate ***")
print("**********************")
print()

control_analysis = clean_control['converted'].agg(['mean', 'std'])
print(f"Control Group conversion rate is {round(control_analysis[0] * 100,2)} %")

exposed_analysis = clean_exposed['converted'].agg(['mean', 'std'])
print(f"Exposed Group conversion rate is {round(exposed_analysis[0] * 100,2)} %")

**********************
** Convertion Rate ***
**********************

Control Group conversion rate is 45.05 %
Exposed Group conversion rate is 46.88 %


In [41]:


def get_z_value(control_meam, exposed_mean, sample_size):
    z = (exposed_mean - control_meam)/(control_meam*(1-control_meam)/sample_size)**0.5
    return z



sample_size = clean_control.shape[0]

control_sample = clean_control.sample(sample_size)
exposed_sample = clean_exposed.sample(sample_size)

control_analysis = control_sample['converted'].agg(['mean', 'std'])
exposed_analysis = exposed_sample['converted'].agg(['mean', 'std'])

z_result = get_z_value(control_analysis[0], exposed_analysis[0], sample_size)

print("************************")
print("** Clasic A/B Result ***")
print("************************")
print()

print(f"Z-value : {z_result}")
print(f"P-value : {scipy.stats.norm.sf(z_result)}")

************************
** Clasic A/B Result ***
************************

Z-value : 0.9132960806052228
P-value : 0.1805434215776347


#### since our p-value is > 0.05 we can't disproof our null hypothesis, That means the advertisement didn't increase the Brand awarness

In [51]:
from statsmodels.stats.proportion import proportions_ztest
check = proportions_ztest([clean_control['converted'].sum(), clean_exposed['converted'].sum()],nobs= [sample_size, sample_size])

check


(-2.571241344425307, 0.010133468049018098)