In [3]:
import pandas as pd
import numpy as np

from scipy import stats

In [4]:
def run_ttest(uu_control, success_control, uu_treatment, success_treatment, alpha):
    
    print ("Control Data:", uu_control, success_control)
    print ("Treatment Data:", uu_treatment, success_treatment)
    
    # Calculate conversion rate
    cvr_control =0
    cvr_treatment =0
    if uu_control != 0:
        cvr_control = success_control/uu_control
    if uu_treatment != 0:
        cvr_treatment = success_treatment/uu_treatment
    
    # standard deviation
    std_dev_control = np.sqrt(cvr_control*(1-cvr_control))
    std_dev_treatment = np.sqrt(cvr_treatment*(1-cvr_treatment))

    print ("Standard deviation for Control %.2f & Treatment %.2f" % (std_dev_control, std_dev_treatment))
    
    # Standard Error
    std_error_control = np.sqrt( np.power(std_dev_control,2)/uu_control)
    std_error_treatment = np.sqrt( np.power(std_dev_treatment,2)/uu_treatment)
    
    print ("Standard error for Control %.2f & Treatment %.2f" % (std_error_control, std_error_treatment))
    
    #signal
    signal_control = cvr_control/std_error_control
    signal_treatment = cvr_treatment/std_error_treatment
    
    # cutoff
    cf_control = 1.96*(std_dev_control/np.sqrt(uu_control))
    cf_treatment = 1.96*(std_dev_treatment/np.sqrt(uu_treatment))
    
    # Calculate confidence interval for Control's conversion rate
    cvr_conf_interval_control_low = cvr_control - cf_control
    cvr_conf_interval_control_high = cvr_control + cf_control
    
    print ("Confidence Interval for Control low: {:.2%} high: {:.2%}".format(cvr_conf_interval_control_low, cvr_conf_interval_control_high))
    
    cvr_conf_interval_treatment_low = cvr_treatment - cf_treatment
    cvr_conf_interval_treatment_high = cvr_treatment + cf_treatment
    print ("Confidence Interval for treatment low: {:.2%} high {:.2%}".format(cvr_conf_interval_treatment_low, cvr_conf_interval_treatment_high))
        
    lift=0
    if cvr_control!=0:
        lift = (cvr_treatment-cvr_control)/cvr_control
    print ("lift: {:.2%}".format(lift))
    
    # confidence interval for lift
    lift_conf_interval_low = (cvr_conf_interval_treatment_low - cvr_conf_interval_control_high)/cvr_conf_interval_control_high
    lift_conf_interval_high = (cvr_conf_interval_treatment_high- cvr_conf_interval_control_low)/cvr_conf_interval_control_low
    print ("Confidence Interval for lift low {:.2%} high {:.2%}".format(lift_conf_interval_low, lift_conf_interval_high))
        
    # calculate standard error
    sed = np.sqrt(std_error_control**2 + std_error_treatment**2)
    
    # calculate t-stat
    t_stat = (cvr_control - cvr_treatment) / sed
    print ("t-stat is %.2f" %(t_stat))
    
    alpha = 0.05
    df = uu_treatment+uu_control - 2

    # calculate critical value
    cv = stats.t.ppf(1.0 - alpha, df)
    print ("Critical Value %.2f " % (cv))
    
    # calculate p-value
    p = (1.0 - stats.t.cdf(abs(t_stat), df)) * 2.0
    print ("THE P-VALUE: %.5f" % (p))
    
    # interpret via critical value
    if abs(t_stat) <= cv:
        print('Accept null hypothesis that the means are equal.')
    else:
        print('Reject the null hypothesis that the means are equal.')
        
    # interpret via p-value
    if p > alpha:
        print('Accept null hypothesis that the means are equal.')
    else:
        print('Reject the null hypothesis that the means are equal.')
        
    return ('%.3f'%(lift),'%.3f'%(p))

In [5]:
# script to download file from PBI
# df.to_csv(r"C:\Users\v-suljai\Desktop\Python Scripts\t.csv", index=False)

In [6]:
data = pd.read_csv("AdobeData.csv")
data.head()

Unnamed: 0,Date Granularity.Level 1: Year,Date Granularity.Level 2: Month,Date Granularity.Level 3: Day,Control_ExpID,Cart_Adds,Cart_Removal,Checkouts,Order,Unique_Visitors,Market (v27),Treatment_ExpID
0,2019,1,27,EX:sfwaaa,18070,5513,16882,18339,591901,en-US,EX:sfwaaa
1,2019,1,27,EX:sfwaab,17872,5627,17105,18618,591782,en-US,EX:sfwaab
2,2019,1,27,EX:20199642t1,17488,5079,16327,17879,576690,en-US,EX:20199642t1
3,2019,1,27,EX:19844185c,17421,5263,16386,17993,577820,en-US,EX:19844185c
4,2019,1,27,EX:19975527t1,17355,5163,16185,17822,578066,en-US,EX:19975527t1


In [7]:
data[(data['Control_ExpID']=='EX:20199642c') & (data['Market (v27)']=='en-us')]

Unnamed: 0,Date Granularity.Level 1: Year,Date Granularity.Level 2: Month,Date Granularity.Level 3: Day,Control_ExpID,Cart_Adds,Cart_Removal,Checkouts,Order,Unique_Visitors,Market (v27),Treatment_ExpID
505,2019,2,1,EX:20199642c,18335,5322,16188,18542,707110,en-us,EX:20199642c
606,2019,2,2,EX:20199642c,16293,5125,15005,16025,581600,en-us,EX:20199642c
706,2019,2,3,EX:20199642c,15639,5075,14883,15184,573056,en-us,EX:20199642c
806,2019,2,4,EX:20199642c,18823,5653,17372,17604,738363,en-us,EX:20199642c
906,2019,2,5,EX:20199642c,18706,5331,16117,16420,744640,en-us,EX:20199642c
1002,2019,2,6,EX:20199642c,18701,5316,15849,15926,749758,en-us,EX:20199642c
1104,2019,2,7,EX:20199642c,17989,5228,15061,15492,720801,en-us,EX:20199642c
1202,2019,2,8,EX:20199642c,16440,4717,13815,15059,636740,en-us,EX:20199642c
1304,2019,2,9,EX:20199642c,15241,4701,13748,14908,528181,en-us,EX:20199642c
1402,2019,2,10,EX:20199642c,16847,5350,15893,16625,610955,en-us,EX:20199642c


['EX:18255570c', 'EX:19161470c', 'EX:19270831c', 'EX:19338270c', 'EX:19637668c', 'EX:19844185c', 'EX:19975527c', 'EX:19976714c', 'EX:19994289c', 'EX:20021227c', 'EX:20199642C', 'EX:20199642c', 'EX:20228027c', 'EX:20228027cac', 'EX:20285521c', 'EX:20303823c', 'EX:20393818C', 'EX:pre20285521c', 'EX:pre20303823c']


['EX:18255570T1', 'EX:18255570T2', 'EX:19161470T1', 'EX:19270831t1', 'EX:19338270T1', 'EX:19338270T2', 'EX:19338270T3', 'EX:19338270T4', 'EX:19338270T5', 'EX:19338270t1', 'EX:19338270t2', 'EX:19338270t3', 'EX:19338270t4', 'EX:19338270t5', 'EX:19637668T1', 'EX:19637668t1', 'EX:19844185t1', 'EX:19975527t1', 'EX:19976714T1', 'EX:19976714T2', 'EX:19976714T3', 'EX:19976714T4', 'EX:19976714T5', 'EX:19976714T6', 'EX:19994289t1', 'EX:20021227t1', 'EX:20021227t2', 'EX:20199642t1', 'EX:20228027T1', 'EX:20228027cat', 'EX:20285521t1', 'EX:20303823T1', 'EX:20393818T1', 'EX:pre20285521t1', 'EX:pre20303823t1', 'EX:surcomm-001t1', 'EX:surcomm-001t2', 'ex:19844185t1']


In [11]:
len(lst_control)

19

In [14]:
lst = data.loc[:,"Control_ExpID"].unique().tolist()

lst_control = []
for i in range(len(lst)):
    if 'c' in lst[i][-1].lower():
        lst_control.append(lst[i])
print (sorted(lst_control))   

lst_treatment = []
for i in range(len(lst)):
    if ('t' in lst[i][-1].lower()) or ('t' in lst[i][-2].lower()):
        lst_treatment.append(lst[i])
print (sorted(lst_treatment))  

lst_control = sorted (lst_control)
lst_treatment = sorted (lst_treatment)

for c in range(len(lst_control)):
    for t in range(len(lst_treatment)):
        if lst_control[c][3:11] in lst_treatment[t]:
            control = lst_control[c]
            treatment = lst_treatment[t]
            
            control_df = data[(data['Control_ExpID']==control) & (data['Market (v27)']=='en-us')]
            treatment_df = data[(data['Control_ExpID']==treatment) & (data['Market (v27)']=='en-us')]
                

['EX:18255570c', 'EX:19161470c', 'EX:19270831c', 'EX:19338270c', 'EX:19637668c', 'EX:19844185c', 'EX:19975527c', 'EX:19976714c', 'EX:19994289c', 'EX:20021227c', 'EX:20199642C', 'EX:20199642c', 'EX:20228027c', 'EX:20228027cac', 'EX:20285521c', 'EX:20303823c', 'EX:20393818C', 'EX:pre20285521c', 'EX:pre20303823c']
['EX:18255570T1', 'EX:18255570T2', 'EX:19161470T1', 'EX:19270831t1', 'EX:19338270T1', 'EX:19338270T2', 'EX:19338270T3', 'EX:19338270T4', 'EX:19338270T5', 'EX:19338270t1', 'EX:19338270t2', 'EX:19338270t3', 'EX:19338270t4', 'EX:19338270t5', 'EX:19637668T1', 'EX:19637668t1', 'EX:19844185t1', 'EX:19975527t1', 'EX:19976714T1', 'EX:19976714T2', 'EX:19976714T3', 'EX:19976714T4', 'EX:19976714T5', 'EX:19976714T6', 'EX:19994289t1', 'EX:20021227t1', 'EX:20021227t2', 'EX:20199642t1', 'EX:20228027T1', 'EX:20228027cat', 'EX:20285521t1', 'EX:20303823T1', 'EX:20393818T1', 'EX:pre20285521t1', 'EX:pre20303823t1', 'EX:surcomm-001t1', 'EX:surcomm-001t2', 'ex:19844185t1']


In [6]:
control = data[(data['Control_ExpID']=='EX:20199642c') & (data['Market (v27)']=='en-us')]
treatment = data[(data['Control_ExpID']=='EX:20199642t1') & (data['Market (v27)']=='en-us')]
#d = data.where(data['Control_ExpID']=='EX:20199642c' )
#d.head()
print (control.head())
print (treatment.head())

     Date Granularity.Level 1: Year  Date Granularity.Level 2: Month  \
505                            2019                                2   
606                            2019                                2   
706                            2019                                2   
806                            2019                                2   
906                            2019                                2   

     Date Granularity.Level 3: Day Control_ExpID  Cart_Adds  Cart_Removal  \
505                              1  EX:20199642c      18335          5322   
606                              2  EX:20199642c      16293          5125   
706                              3  EX:20199642c      15639          5075   
806                              4  EX:20199642c      18823          5653   
906                              5  EX:20199642c      18706          5331   

     Checkouts  Order  Unique_Visitors Market (v27) Treatment_ExpID  
505      16188  18542           70

In [7]:
uu_control = control['Unique_Visitors'].sum()
ATC_control = control['Cart_Adds'].sum()
Checkouts_control = control['Checkouts'].sum()
Orders_control = control['Order'].sum()

uu_treatment = treatment['Unique_Visitors'].sum()
ATC_treatment = treatment['Cart_Adds'].sum()
Checkouts_treatment = treatment['Checkouts'].sum()
Orders_treatment = treatment['Order'].sum()

In [8]:
lift, p=run_ttest(uu_control, ATC_control, uu_treatment, ATC_treatment, 0.95)

Control Data: 8481215 221896
Treatment Data: 8476918 221969
Standard deviation for Control 0.16 & Treatment 0.16
Standard error for Control 0.00 & Treatment 0.00
Confidence Interval for Control low: 2.61% high: 2.63%
Confidence Interval for treatment low: 2.61% high 2.63%
lift: 0.08%
Confidence Interval for lift low -0.73% high 0.91%
t-stat is -0.28
Critical Value 1.64 
THE P-VALUE: 0.77786
Accept null hypothesis that the means are equal.
Accept null hypothesis that the means are equal.


In [9]:
#stats_exp_data = pd.DataFrame()
data1 = ['EX:20199642c']
cols = ['ExpID_Control']
stats_data = pd.DataFrame(data = data1, columns=cols)

stats_data['ExpID_Treatment'] = 'EX:20199642t1'
stats_data['KPI'] = 'Add-to-Carts'

stats_data['UU_control'] = uu_control
stats_data['UU_treatment'] = uu_treatment
stats_data['ATC_control'] = ATC_control
stats_data['ATC_treatment'] = ATC_treatment
stats_data['CVR Lift'] = lift

stats_data['p_val'] = p
stats_data

Unnamed: 0,ExpID_Control,ExpID_Treatment,KPI,UU_control,UU_treatment,ATC_control,ATC_treatment,CVR Lift,p_val
0,EX:20199642c,EX:20199642t1,Add-to-Carts,8481215,8476918,221896,221969,0.001,0.778


In [10]:
lift, p=run_ttest(uu_control, Checkouts_control, uu_treatment, Checkouts_treatment, 0.95)

Control Data: 8481215 196294
Treatment Data: 8476918 196783
Standard deviation for Control 0.15 & Treatment 0.15
Standard error for Control 0.00 & Treatment 0.00
Confidence Interval for Control low: 2.30% high: 2.32%
Confidence Interval for treatment low: 2.31% high 2.33%
lift: 0.30%
Confidence Interval for lift low -0.57% high 1.18%
t-stat is -0.95
Critical Value 1.64 
THE P-VALUE: 0.34217
Accept null hypothesis that the means are equal.
Accept null hypothesis that the means are equal.


In [11]:
stats_data.loc[1] = ['EX:20199642c', 'EX:20199642t1','Checkouts',uu_control, uu_treatment, Checkouts_control, Checkouts_treatment, lift,p]
stats_data

Unnamed: 0,ExpID_Control,ExpID_Treatment,KPI,UU_control,UU_treatment,ATC_control,ATC_treatment,CVR Lift,p_val
0,EX:20199642c,EX:20199642t1,Add-to-Carts,8481215,8476918,221896,221969,0.001,0.778
1,EX:20199642c,EX:20199642t1,Checkouts,8481215,8476918,196294,196783,0.003,0.342


In [12]:
#stats_exp_data = pd.DataFrame()
data1 = ['EX:20199642c']
cols = ['ExpID_Control']
stats_data = pd.DataFrame(data = data1, columns=cols)

stats_data['ExpID_Treatment'] = 'EX:20199642t1'
stats_data['KPI'] = 'Checkouts'

stats_data['UU_control'] = uu_control
stats_data['UU_treatment'] = uu_treatment
stats_data['ATC_control'] = ATC_control
stats_data['ATC_treatment'] = ATC_treatment
stats_data['CVR Lift'] = lift

stats_data['p_val'] = p
stats_data

Unnamed: 0,ExpID_Control,ExpID_Treatment,KPI,UU_control,UU_treatment,ATC_control,ATC_treatment,CVR Lift,p_val
0,EX:20199642c,EX:20199642t1,Checkouts,8481215,8476918,221896,221969,0.003,0.342


In [13]:
lift, p=run_ttest(uu_control, Orders_control, uu_treatment, Orders_treatment, 0.95)

Control Data: 8481215 208299
Treatment Data: 8476918 208316
Standard deviation for Control 0.15 & Treatment 0.15
Standard error for Control 0.00 & Treatment 0.00
Confidence Interval for Control low: 2.45% high: 2.47%
Confidence Interval for treatment low: 2.45% high 2.47%
lift: 0.06%
Confidence Interval for lift low -0.79% high 0.91%
t-stat is -0.19
Critical Value 1.64 
THE P-VALUE: 0.84753
Accept null hypothesis that the means are equal.
Accept null hypothesis that the means are equal.


In [14]:
#stats_exp_data = pd.DataFrame()
data1 = ['EX:20199642c']
cols = ['ExpID_Control']
stats_data = pd.DataFrame(data = data1, columns=cols)

stats_data['ExpID_Treatment'] = 'EX:20199642t1'
stats_data['KPI'] = 'Orders'

stats_data['UU_control'] = uu_control
stats_data['UU_treatment'] = uu_treatment
stats_data['ATC_control'] = ATC_control
stats_data['ATC_treatment'] = ATC_treatment
stats_data['CVR Lift'] = lift

stats_data['p_val'] = p
stats_data

Unnamed: 0,ExpID_Control,ExpID_Treatment,KPI,UU_control,UU_treatment,ATC_control,ATC_treatment,CVR Lift,p_val
0,EX:20199642c,EX:20199642t1,Orders,8481215,8476918,221896,221969,0.001,0.848


https://machinelearningmastery.com/how-to-code-the-students-t-test-from-scratch-in-python/

In [None]:
stats_data.append()

In [16]:
# 'dataset' holds the input data for this script

import pandas as pd
import numpy as np

from scipy import stats

def run_ttest(uu_control, success_control, uu_treatment, success_treatment, alpha):
    
    print ("Control Data:", uu_control, success_control)
    print ("Treatment Data:", uu_treatment, success_treatment)
    
    # Calculate conversion rate
    cvr_control =0
    cvr_treatment =0
    if uu_control != 0:
        cvr_control = success_control/uu_control
    if uu_treatment != 0:
        cvr_treatment = success_treatment/uu_treatment
    
    # standard deviation
    std_dev_control = np.sqrt(cvr_control*(1-cvr_control))
    std_dev_treatment = np.sqrt(cvr_treatment*(1-cvr_treatment))

    print ("Standard deviation for Control %.2f & Treatment %.2f" % (std_dev_control, std_dev_treatment))
    
    # Standard Error
    std_error_control = np.sqrt( np.power(std_dev_control,2)/uu_control)
    std_error_treatment = np.sqrt( np.power(std_dev_treatment,2)/uu_treatment)
    
    print ("Standard error for Control %.2f & Treatment %.2f" % (std_error_control, std_error_treatment))
    
    #signal
    signal_control = cvr_control/std_error_control
    signal_treatment = cvr_treatment/std_error_treatment
    
    # cutoff
    cf_control = 1.96*(std_dev_control/np.sqrt(uu_control))
    cf_treatment = 1.96*(std_dev_treatment/np.sqrt(uu_treatment))
    
    # Calculate confidence interval for Control's conversion rate
    cvr_conf_interval_control_low = cvr_control - cf_control
    cvr_conf_interval_control_high = cvr_control + cf_control
    
    print ("Confidence Interval for Control low: {:.2%} high: {:.2%}".format(cvr_conf_interval_control_low, cvr_conf_interval_control_high))
    
    cvr_conf_interval_treatment_low = cvr_treatment - cf_treatment
    cvr_conf_interval_treatment_high = cvr_treatment + cf_treatment
    print ("Confidence Interval for treatment low: {:.2%} high {:.2%}".format(cvr_conf_interval_treatment_low, cvr_conf_interval_treatment_high))
        
    lift=0
    if cvr_control!=0:
        lift = (cvr_treatment-cvr_control)/cvr_control
    print ("lift: {:.2%}".format(lift))
    
    # confidence interval for lift
    lift_conf_interval_low = (cvr_conf_interval_treatment_low - cvr_conf_interval_control_high)/cvr_conf_interval_control_high
    lift_conf_interval_high = (cvr_conf_interval_treatment_high- cvr_conf_interval_control_low)/cvr_conf_interval_control_low
    print ("Confidence Interval for lift low {:.2%} high {:.2%}".format(lift_conf_interval_low, lift_conf_interval_high))
        
    # calculate standard error
    sed = np.sqrt(std_error_control**2 + std_error_treatment**2)
    
    # calculate t-stat
    t_stat = (cvr_control - cvr_treatment) / sed
    print ("t-stat is %.2f" %(t_stat))
    
    alpha = 0.05
    df = uu_treatment+uu_control - 2

    # calculate critical value
    cv = stats.t.ppf(1.0 - alpha, df)
    print ("Critical Value %.2f " % (cv))
    
    # calculate p-value
    p = (1.0 - stats.t.cdf(abs(t_stat), df)) * 2.0
    print ("THE P-VALUE: %.5f" % (p))
    
    # interpret via critical value
    if abs(t_stat) <= cv:
        print('Accept null hypothesis that the means are equal.')
    else:
        print('Reject the null hypothesis that the means are equal.')
        
    # interpret via p-value
    if p > alpha:
        print('Accept null hypothesis that the means are equal.')
    else:
        print('Reject the null hypothesis that the means are equal.')
        
    return (lift, p)


            
def get_ttest(control, treatment):
    # get all KPIs for control
    uu_control = control['Unique_Visitors'].sum()
    ATC_control = control['Cart_Adds'].sum()
    Checkouts_control = control['Checkouts'].sum()
    Orders_control = control['Orders'].sum()

    # get all KPIs from Treatment
    uu_treatment = treatment['Unique_Visitors'].sum()
    ATC_treatment = treatment['Cart_Adds'].sum()
    Checkouts_treatment = treatment['Checkouts'].sum()
    Orders_treatment = treatment['Orders'].sum()

    # run statistical test for ATC
    lift, p=run_ttest(uu_control, ATC_control, uu_treatment, ATC_treatment, 0.95)

    # write to dataframe
    data1 = control['Control_ExpID'] #['EX:20199642c']
    cols = ['ExpID_Control']
    stats_data = pd.DataFrame(data = data1, columns=cols)

    stats_data['ExpID_Treatment'] = treatment['Treatment_ExpID'] #'EX:20199642t1'
    stats_data['KPI'] = 'Add-to-Carts'

    stats_data['UU_control'] = uu_control
    stats_data['UU_treatment'] = uu_treatment
    stats_data['KPI_control'] = ATC_control
    stats_data['KPI_treatment'] = ATC_treatment
    stats_data['CVR Lift'] = lift

    stats_data['p_val'] = p

    # checkout
    lift, p=run_ttest(uu_control, Checkouts_control, uu_treatment, Checkouts_treatment, 0.95)

    # write to dataframe
    data1 = control['Control_ExpID']  #['EX:20199642c']
    cols = ['ExpID_Control']
    stats_data_ck = pd.DataFrame(data = data1, columns=cols)
    
    stats_data['ExpID_Treatment'] = treatment['Treatment_ExpID'] #'EX:20199642t1'
    stats_data_ck['KPI'] = 'Checkouts'

    stats_data_ck['UU_control'] = uu_control
    stats_data_ck['UU_treatment'] = uu_treatment
    stats_data_ck['KPI_control'] = Checkouts_control
    stats_data_ck['KPI_treatment'] = Checkouts_treatment
    stats_data_ck['CVR Lift'] = lift

    stats_data_ck['p_val'] = p

    stats_data.append(stats_data_ck, ignore_index=True)

    #Order
    lift, p=run_ttest(uu_control, Orders_control, uu_treatment, Orders_treatment, 0.95)

    # write to dataframe
    data1 = control['Control_ExpID']  #['EX:20199642c']
    cols = ['ExpID_Control']
    stats_data_or = pd.DataFrame(data = data1, columns=cols)

    stats_data['ExpID_Treatment'] = treatment['Treatment_ExpID'] #'EX:20199642t1'    
    stats_data_or['KPI'] = 'Orders'

    stats_data_or['UU_control'] = uu_control
    stats_data_or['UU_treatment'] = uu_treatment
    stats_data_or['KPI_control'] = Orders_control
    stats_data_or['KPI_treatment'] = Orders_treatment
    stats_data_or['CVR Lift'] = lift

    stats_data_or['p_val'] = p

    stats_data.append(stats_data_or, ignore_index=True)
    

    #control = dataset[dataset['Control_ExpID']=='EX:20199642c']
#treatment = dataset[dataset['Treatment_ExpID']=='EX:20199642t1']

lst = dataset.loc[:,"Control_ExpID"].unique().tolist()

lst_control = []
for i in range(len(lst)):
    if 'c' in lst[i][-1].lower():
        lst_control.append(lst[i])
print (sorted(lst_control))   

lst_treatment = []
for i in range(len(lst)):
    if ('t' in lst[i][-1].lower()) or ('t' in lst[i][-2].lower()):
        lst_treatment.append(lst[i])
print (sorted(lst_treatment))  

lst_control = sorted (lst_control)
lst_treatment = sorted (lst_treatment)

for c in range(len(lst_control)):
    for t in range(len(lst_treatment)):
        if lst_control[c][3:11] in lst_treatment[t]:
            control = lst_control[c]
            treatment = lst_treatment[t]
            
            control_df = dataset[(dataset['Control_ExpID']==control) & (dataset['Market (v27)']=='en-us')]
            treatment_df = dataset[(dataset['Treatment_ExpID']==treatment) & (dataset['Market (v27)']=='en-us')]
            
            get_ttest(control_df, treatment_df)

NameError: name 'dataset' is not defined

In [None]:
stats_data_or