# A/B TESTING

In [2]:
#The Need for Experimentation
import pandas as pd
desktop=pd.read_csv('desktop.csv')
laptop=pd.read_csv('laptop.csv')
print(desktop.head())
print(laptop.head())

   userid  spending  age  visits
0       1      1250   31     126
1       2       900   27       5
2       3         0   30     459
3       4      2890   22      18
4       5      1460   38      20
   userid  spending  age  visits
0      31      1499   32      12
1      32       799   23      40
2      33      1200   45      22
3      34         0   59     126
4      35      1350   17      85


In [3]:
import scipy.stats
print(scipy.stats.ttest_ind(desktop['spending'],laptop['spending']))
print(scipy.stats.ttest_ind(desktop['age'],laptop['age']))
print(scipy.stats.ttest_ind(desktop['visits'],laptop['visits']))

TtestResult(statistic=-2.109853741030508, pvalue=0.03919630411621095, df=58.0)
TtestResult(statistic=-0.7101437106800108, pvalue=0.4804606394128761, df=58.0)
TtestResult(statistic=0.20626752311535543, pvalue=0.8373043059847984, df=58.0)


In [4]:
#Running Experiments to Test New Hypotheses
import numpy as np
medianage=np.median(desktop['age'])
groupa=desktop.loc[desktop['age']<=medianage,:]
groupb=desktop.loc[desktop['age']>medianage,:]

In [7]:
emailresults1=pd.read_csv('emailresults1.csv')
print(emailresults1.head())

   userid  revenue
0       1      100
1       2        0
2       3       50
3       4      550
4       5      175


In [8]:
groupa_withrevenue=groupa.merge(emailresults1,on='userid')
groupb_withrevenue=groupb.merge(emailresults1,on='userid')

In [9]:
print(scipy.stats.ttest_ind(groupa_withrevenue['revenue'],groupb_withrevenue['revenue']))

TtestResult(statistic=-2.186454851070545, pvalue=0.03730073920038287, df=28.0)


In [10]:
print(np.mean(groupb_withrevenue['revenue'])-np.mean(groupa_withrevenue['revenue']))

125.0


In [11]:
#Translating the Math into Practice
np.random.seed(18811015)
laptop.loc[:,'groupassignment1']=1*(np.random.random(len(laptop.index))>0.5)
groupc=laptop.loc[laptop['groupassignment1']==0,:].copy()
groupd=laptop.loc[laptop['groupassignment1']==1,:].copy()

In [12]:
emailresults2=pd.read_csv('emailresults2.csv')

In [13]:
groupc_withrevenue=groupc.merge(emailresults2,on='userid')
groupd_withrevenue=groupd.merge(emailresults2,on='userid')

In [14]:
print(scipy.stats.ttest_ind(groupc_withrevenue['revenue'],groupd_withrevenue['revenue']))

TtestResult(statistic=-2.381320497676198, pvalue=0.024288828555138562, df=28.0)


In [15]:
print(np.mean(groupd_withrevenue['revenue'])-np.mean(groupc_withrevenue['revenue']))

260.3333333333333


In [16]:
#Understanding Effect Sizes
gdps=[365303000000,65994000000,220000000]

In [17]:
print(np.std(gdps))

158884197328.32672


In [18]:
print(125/np.std(gdps))

7.867365169217765e-10


In [19]:
burgers=[9.0,12.99,10.50]

In [20]:
print(np.std(burgers))

1.6455394252341695


In [21]:
print(125/np.std(burgers))

75.96293232671214


In [22]:
print(125/np.std(emailresults1['revenue']))

0.763769235188029


In [23]:
#Calculating the Significance of Data
from statsmodels.stats.power import TTestIndPower

In [24]:
alpha=0.05

In [25]:
nobs=45

In [26]:
effectsize=0.5

In [27]:
analysis = TTestIndPower()
power = analysis.solve_power(effect_size=effectsize, nobs1=nobs, alpha=alpha)

In [29]:
analysis = TTestIndPower()
alpha = 0.05
effect = 0.5
power = 0.8
observations = analysis.solve_power(effect_size=effect, power=power, alpha=alpha)
print(observations)

63.7656117754095


Summary: I covered A/B testing, beginning with a simple t-test and highlighting the importance of random, unbiased data collection in the process. I explored key aspects of A/B testing, including the champion/challenger framework, Twyman’s law, and ethical considerations.