In [2]:
import import_ipynb
from backend_nonproportional import *
import statsmodels.stats.api as sms
import statsmodels.stats.power as pwr
import pandas as pd
import numpy as np
from IPython.display import Markdown, display

In [3]:
#Install all packages if no module found.Please comment it out once the package is intalled
'''!pip install feather-format 
!pip install import_ipynb
!pip install statsmodels
!pip install pandas
!pip install numpy'''

'!pip install feather-format \n!pip install import_ipynb\n!pip install statsmodels\n!pip install pandas\n!pip install numpy'

In [4]:

def printmd(string):
    display(Markdown(string))


def get_sample_size(u1,lift,std_dev):

    effect_size = abs((u1-((lift+ 1 ) * u1))/ std_dev)
    sample_size = pwr.tt_ind_solve_power(effect_size, nobs1=None, alpha=.05, power=0.8, ratio=1)

    return round(sample_size)

def get_splits(max_split):
    splits = [0.01,0.02]
    for i in range(10):
        new_split=round(0.05*(i+1),4)
        if new_split<max_split:
            splits.append(new_split)
        else:
            splits.append(max_split)
            break
    splits.sort()
    return splits

def generate_df(mean,std_dev,lifts,splits,sessions_per_day):
    sample_sizes = np.array([(lift, split, get_sample_size(mean,lift,std_dev)) for lift in lifts for split in splits])
    df = pd.DataFrame(sample_sizes, columns=["lift", "split", "sample_size_per_arm"])
    df['mean']=mean
    df['minimum_detectable_effect']=mean*(1+df['lift'])
    df['days_to_run']=round(df['sample_size_per_arm']/(sessions_per_day*(df['split'])))
    return df

def filter_on_day_length(df,mindays,maxdays):
    df2 = df[(df["days_to_run"]>=mindays) & (df["days_to_run"]<=maxdays)]
    df2 = df2.sort_values(by=['days_to_run','split'])
    return df2


def get_max_split(narms):
    max_split = round(1/narms,2)
    return(max_split)
#######

def get_user_variables_automated():
    inputs={"narms": "How many ARMS are in the experiment (Control + All treatments)?  ",
            "nsessions": "Please enter the page name where experiment has to run  ",
            "mean": "The MEAN (AVERAGE) of your data:  ",
            "std_dev": "The Standard Deviation of your data:  ",
            "ndays": "Number of DAYS historical data span for:  ",
            "mindays": "What is the MINIMUM number of days that you think your test should run for? (Usually 14)  ",
            "maxdays": "What is the MAXIMUM number of days that you think the test should run for? (Usually 28)  "}

    outputs={"narms": None,
             "nsessions": None,
             "mean": None,
             "std_dev": None,
             "ndays": None,
             "mindays": None,
             "maxdays": None}
    counts=0
    
    #####
    counter_inp = 1
    
    while True:
        
        printmd('\n ****Question:**** \n')
        metric = int(input("Please enter the metric for which analysis has to be done? :\n 1 for RPS (Revenue Per Session) \n 2 for AOV (Average Order Value) \n 3 for RPV (Revenue per visitor) \n 4 for UPO (Units per order) \n"))
        
        if(metric == 1 or metric == 2 or metric == 3 or metric == 4):
            break
            
        else:
            print("You Entered Wrong Option. Please enter the right option, Example: 1 ")
            continue
    
    while True:
        
        printmd('\n ****Question:**** \n')
        device_inp = int(input("Select the device:\n 1 for Desktop \n 2 for Mobile App\n 3 for Mobile Web\n 4 for Tablet: "))

        if(device_inp == 1 or device_inp == 2 or device_inp == 3 or device_inp == 4):
            break
            
        else:
            print("You Entered Wrong Option. Please enter the right option, Example: 1 ")
            continue
            
    while True:
        
        printmd('\n ****Question:**** \n')
        month_name = input("Enter the month of the experiment: ")
        month_name = month_name.lower()
        counter_inp = counter_inp + 1
        
        if(month_name == "november" or month_name == "december"):
            print("You have entered an invalid month. Please try entering a month from january to October")
            continue
            
        elif month_name == "january" or month_name == "february" or month_name == "march" or \
        month_name == "april" or month_name == "may" or month_name == "june" or month_name == "july" or month_name \
        == "august" or month_name == "september" or month_name == "october":
            break
            
        else:
            print("You entered wrong input. Please try entering a month from january to October, Example: 'march'")
            continue

    ####
    for key, value in inputs.items():
        while counts<len(outputs):
            try:
                
                if key == 'nsessions':
                    printmd('\n ****Question:**** \n')
                    user=input(value)
                    
                    while True:
                        landingPage = user.lower().replace(" ","")
                        if (landingPage == "search" or landingPage == "browse" or \
                            landingPage == "pdp" or landingPage == "atb" or landingPage == "bag" or landingPage == "homepage"):
                            break
                        else :
                            print("\n You have entered  wrong input. Please enter any one from options below.")
                            printmd("****The possible landing pages are:\n 1. homepage \n 2. search \n 3. browse \n 4. pdp \n 5. atb \n 6. bag ****")
                            break
                                            
                    total_session,Avg,std,days,prev_month, current_month, next_month,a,b,c,d = bagOfVisits.func_call(device_inp,landingPage,month_name,metric)
                    outputs[key]=int(total_session)
                    printmd("***Total Visits of {}: {}***".format(user,total_session))
                    break
                
                if key == 'mean':
                    mean_float = float(Avg)
                    counts+=1
                    outputs[key]=mean_float
                    print("\n")
                    printmd('****%s****'% value)
                    print(str(mean_float))
                    break
                    
                elif key == 'std_dev':
                    std_float = float(std)
                    counts+=1
                    outputs[key]=std_float
                    print("\n")
                    printmd('****%s****'% value)
                    print(str(std_float))
                    print("\n")
                    break
                    
                elif key=='ndays':
                    outputs[key]=int(days)
                    print("**********************************************************************")
                    printmd('****%s****'% value)
                    print(str(days))
                    print("Previous Month: {} \t Current Month: {} \t  Next Month: {} \t  \n \n".format(a,b,c))
                    print("\n")
                    printmd("\n ****Fiscal Previous & Next Month Mapping Table**** \n")
                    print(d)
                    print("**********************************************************************")
                    break
                    
                else:
                    printmd('\n ****Question:**** \n')
                    user_=input(value)
                    user_int=int(user_.replace(',',''))
                    if user_int<=1:
                        print("Please enter a number greater than one...", '\n')
                        continue
                    else:
                        print("Accepted: {} ".format(user_int), '\n')
                        counts+=1
                        outputs[key]=user_int
                        break
            except Exception as e:
                print("Error: {}".format(e))
                print("Please enter a valid number...", '\n')
                continue
    if outputs.get('maxdays') <= outputs.get('mindays'):
        print("Warning: Max Days {} is <= Min Days {}".format(outputs.get('maxdays'),outputs.get('mindays')))
        print("Will reset to 14-28...")
        outputs['mindays']=14
        outputs['maxdays']=28
    else:
        pass
    try:
        outputs['sessions_per_day']=round(outputs.get('nsessions')/outputs.get('ndays'))
        print("Accepted! Here are you input values: ", '\n')
        for key, value in outputs.items():
            print(key, value)
        print('\n')
        return outputs
    except Exception as e:
        raise e




########

def get_user_variables():
    inputs={"narms": "How ARMS are in the experiment (Control + All treatments)?  ",
            "nsessions": "How many SESSIONS did your historical data have? (commas ok)  ",
            "mean": "What is the MEAN (AVERAGE) of your data?  ",
            "std_dev": "What is the Standard Deviation of your data?  ",
            "ndays": "How many DAYS did this historical data span for?  ",
            "mindays": "What is the MINIMUM number of days that you think your test should run for? (Usually 14)  ",
            "maxdays": "What is the MAXIMUM number of days that you think the test should run for? (Usually 28)  "}

    outputs={"narms": None,
             "nsessions": None,
             "mean": None,
             "std_dev": None,
             "ndays": None,
             "mindays": None,
             "maxdays": None}
    counts=0
    for key, value in inputs.items():
        print('\n','Question # {}: '.format(counts+1),'\n')
        while counts<len(outputs):
            try:
                
                if key == 'mean' or key == 'std_dev':
                    user_=input(value)
                    user_float=float(user_)
                    counts+=1
                    outputs[key]=user_float
                    break
                else:
                    user_=input(value)
                    user_int=int(user_.replace(',',''))
                    if user_int<=1:
                        print("Please enter a number greater than one...", '\n')
                        continue
                    else:
                        print("Accepted: {} ".format(user_int), '\n')
                        counts+=1
                        outputs[key]=user_int
                        break
            except Exception as e:
                print("Error: {}".format(e))
                print("Please enter a valid number...", '\n')
                continue
    if outputs.get('maxdays') <= outputs.get('mindays'):
        print("Warning: Max Days {} is <= Min Days {}".format(outputs.get('maxdays'),outputs.get('mindays')))
        print("Will reset to 14-28...")
        outputs['mindays']=14
        outputs['maxdays']=28
    else:
        pass
    try:
        outputs['sessions_per_day']=round(outputs.get('nsessions')/outputs.get('ndays'))
        print("Accepted! Here are you input values: ", '\n')
        for key, value in outputs.items():
            print(key, value)
        print('\n')
        return outputs
    except Exception as e:
        raise e

def get_lifts_splits(max_split):
    inputs={"lift": "Is there a specific LIFT value you want to measure? (Yes/No)  ",
            "split": "Is there a specific SPLIT value you want to measure? (Yes/No)  "}
    outputs={"lift":None, "split":None}
    counts=0
    for key,value in inputs.items():
        while counts<len(outputs)+1:
            try:
                user_=input(value)
                if user_.upper()=='YES' or user_.upper()=='Y':
                    specific_=input("Please input a raw decimal value (without % sign):    ")
                    try:
                        print(specific_)
                        specific_float=float(specific_)
                        if specific_float > max_split and key=='split':
                            print("Warning!! Split entered is greater than the max split allowed... please enter a split value less than or equeal to {}   ".format(max_split))
                            continue
                        elif specific_float <= 0:
                            print("Please enter a value greater than zero!  ")
                            continue
                        else:
                            counts+=1
                            outputs[key]=[specific_float]
                            print("Accepted: {}".format(specific_float), "\n")
                            break
                    except Exception as e:
                        print("Error! Please enter a valid decimal value, ex. 2.5% would be 0.025.")
                        print(e)
                        continue

                elif user_.upper()=='NO' or user_.upper()=='N':
                    print("Ok, a range of values will be provided then...   ", "\n")
                    if key=='lift':
                        lifts = [round(0.005*(i+1)/10,4) for i in range(100)]
                        outputs[key]=lifts
                        print(lifts)
                        counts+=1
                        break
                    elif key=='split':
                        splits = get_splits(max_split)
                        outputs[key]=splits
                        print(splits)
                        counts+=1
                        break
                    else:
                        break
                else:
                    print("Sorry, user input {} was not understood... let's try that again... ".format(user_), '\n')
                    continue
            except Exception as e:
                print(e)
                print("Sorry, someting went wrong... let's try that again... ",'\n')
                continue
    return outputs


def run_power_analysis(userdata, ls, maxsplit):
    narms=userdata.get('narms')
    mindays=int(userdata.get('mindays'))
    maxdays=int(userdata.get('maxdays'))
    mean=userdata.get('mean')
    std_dev=userdata.get('std_dev')
    baseline_conversion_rate=userdata.get('baseline_conversion_rate')
    sessions_per_day=userdata.get('sessions_per_day')
    lifts=ls.get('lift')
    splits=ls.get('split')

    name_of_experiment=input("What is the name of your experiment?  ")
    print('\n', 'Generating Report... Please wait...','\n')
    df = generate_df(mean,std_dev,lifts,splits,sessions_per_day)
    df2 = filter_on_day_length(df,mindays,maxdays)
    if len(df2.index)>0:
        print("The experiment: {}, has {} arms.".format(name_of_experiment, narms),'\n')
        print("The maximum split you can have per group is {0:.2%}".format(maxsplit),'\n')
        #print("The baseline conversion rate is {0:.2%}.".format(baseline_conversion_rate),'\n')
        df2.to_csv("Filtered Power Analysis for {} with {} groups.csv".format(name_of_experiment,narms),index=None)
        df.to_csv("Unfiltered Power Analysis for {} with {} groups.csv".format(name_of_experiment,narms),index=None)
        print("Filtered and Unfiltered results have been saved to CSV!",'\n')
        print("Snapshot also shown below.",'\n','\n')
        return df2
    else:
        print('Warning!! Unable to conduct power analysis within the spcified time frame','\n')
        print('All possible values, if any, will be returned for your consideration.','\n')
        df.to_csv("Power Analysis - All values - for {} with {} groups.csv".format(name_of_experiment,narms),index=None)
        return df

def main_power_analysis():
    
    printmd("\n ****Do you want to run Standardized or Non-Standardized Experiment?****\n")
    type_of_inp = int(input("Enter 1 for Non-Standardized input (i.e., Manual Entry) \
    \nEnter 2 for Standardized input  "))
    if type_of_inp == 1:
        data=get_user_variables()
    elif type_of_inp ==2:
        data=get_user_variables_automated()
    else: 
        print("You have selected wrong option")
    maxsplit=get_max_split(data.get('narms'))
    lift_splits=get_lifts_splits(maxsplit)
    df2=run_power_analysis(userdata=data,ls=lift_splits,maxsplit=maxsplit)
    print(df2)



if __name__ == '__main__':
    main_power_analysis()

    
                             


 ****Do you want to run Standardized or Non-Standardized Experiment?****


Enter 1 for Non-Standardized input (i.e., Manual Entry)     
Enter 2 for Standardized input  2



 ****Question:**** 


Please enter the metric for which analysis has to be done? :
 1 for RPS (Revenue Per Session) 
 2 for AOV (Average Order Value) 
 3 for RPV (Revenue per visitor) 
 4 for UPO (Units per order) 
1



 ****Question:**** 


Select the device:
 1 for Desktop 
 2 for Mobile App
 3 for Mobile Web
 4 for Tablet: 1



 ****Question:**** 


Enter the month of the experiment: September



 ****Question:**** 


How many ARMS are in the experiment (Control + All treatments)?  2
Accepted: 2  




 ****Question:**** 


Please enter the page name where experiment has to run  homepage


***Total Visits of homepage: 38348730***





****The MEAN (AVERAGE) of your data:  ****

140.397677702




****The Standard Deviation of your data:  ****

225.350401853856


**********************************************************************


****Number of DAYS historical data span for:  ****

90
Previous Month: august 	 Current Month: september 	  Next Month: october 	  
 






 ****Fiscal Previous & Next Month Mapping Table**** 


    Current Month Number Current Month Name  Previous Month Number  \
0                      1           february                     12   
1                      2              march                      1   
2                      3              april                      2   
3                      4                may                      3   
4                      5               june                      4   
5                      6               july                      5   
6                      7             august                      6   
7                      8          september                      7   
8                      9            october                      8   
9                     10           november                      9   
10                    11           december                     10   
11                    12            january                      0   

    Next Month Number Previous Month Name     Next Month Name  
0                   2    


 ****Question:**** 


What is the MINIMUM number of days that you think your test should run for? (Usually 14)  2
Accepted: 2  




 ****Question:**** 


What is the MAXIMUM number of days that you think the test should run for? (Usually 28)  28
Accepted: 28  

Accepted! Here are you input values:  

narms 2
nsessions 38348730
mean 140.397677702
std_dev 225.350401853856
ndays 90
mindays 2
maxdays 28
sessions_per_day 426097


Is there a specific LIFT value you want to measure? (Yes/No)  no
Ok, a range of values will be provided then...    

[0.0005, 0.001, 0.0015, 0.002, 0.0025, 0.003, 0.0035, 0.004, 0.0045, 0.005, 0.0055, 0.006, 0.0065, 0.007, 0.0075, 0.008, 0.0085, 0.009, 0.0095, 0.01, 0.0105, 0.011, 0.0115, 0.012, 0.0125, 0.013, 0.0135, 0.014, 0.0145, 0.015, 0.0155, 0.016, 0.0165, 0.017, 0.0175, 0.018, 0.0185, 0.019, 0.0195, 0.02, 0.0205, 0.021, 0.0215, 0.022, 0.0225, 0.023, 0.0235, 0.024, 0.0245, 0.025, 0.0255, 0.026, 0.0265, 0.027, 0.0275, 0.028, 0.0285, 0.029, 0.0295, 0.03, 0.0305, 0.031, 0.0315, 0.032, 0.0325, 0.033, 0.0335, 0.034, 0.0345, 0.035, 0.0355, 0.036, 0.0365, 0.037, 0.0375, 0.038, 0.0385, 0.039, 0.0395, 0.04, 0.0405, 0.0