In [None]:
import pandas as pd
import numpy as np
import json
import seaborn as sns
import matplotlib.pyplot as plt

def plot_delta_simulations():
    _, ax = plt.subplots()

    percentage_mappings = {2: '5', 5: '15', 9:'30'}

    for mte in [2, 5, 9]:
        simulations = pd.read_csv(f'experiment_simulations_{32 + mte}.csv')

        simulations = simulations[simulations['winner'] != 'inconclusive']
        
        simulations = simulations[simulations['sample'] >= 200]

        grouped = simulations[['simulation', 'sample']].groupby('simulation', as_index=False).min()

        results = simulations.merge(grouped, on=['simulation', 'sample'])

        hist = results[['simulation', 'sample']].groupby('sample', as_index=False).count()

        records = json.loads(hist.set_index('sample').to_json(orient='index'))

        conclusive_simulations = 0
        print_counter = 0
        plotting_conclusions = []
        x = list(range(1, 10001))

        for i in x:
            if str(i) in records:
                conclusive_simulations += records[str(i)]['simulation']

            if (conclusive_simulations >= 80) and (print_counter < 1):
                print(f'For minimum detectable effect of {percentage_mappings[mte]}%, {80}% of simulations needed {i} samples to be conclusive')
                print_counter += 1

            plotting_conclusions.append(conclusive_simulations)

        ax.plot(x, plotting_conclusions, label=f'{percentage_mappings[mte]}%')

    ax.plot(list(range(1,10001)), np.full((10000,), 80), label='80% threshold', linestyle='dashed')
    ax.legend()
    ax.set_xlabel('Sample Size')
    ax.set_ylabel('Proportion of Conclusive Simulations')
    ax.set_title('Conclusive Simulations for varying δ')

    plt.show()


def plot_epsilon_simulations():
    _, ax = plt.subplots()

    sim_names = {'low': 'low_eps',
                 'medium': '37',
                 'high': 'high_eps'}

    epsilons = {'low': 0.0005,
                 'medium': 0.0015,
                 'high': 0.003}

    for eps in ['low', 'medium', 'high']:
        simulations = pd.read_csv(f'experiment_simulations_{sim_names[eps]}.csv')

        simulations = simulations[simulations['winner'] != 'inconclusive']
        
        simulations = simulations[simulations['sample'] >= 200]

        grouped = simulations[['simulation', 'sample']].groupby('simulation', as_index=False).min()

        results = simulations.merge(grouped, on=['simulation', 'sample'])

        hist = results[['simulation', 'sample']].groupby('sample', as_index=False).count()

        records = json.loads(hist.set_index('sample').to_json(orient='index'))

        conclusive_simulations = 0
        print_counter = 0
        plotting_conclusions = []
        x = list(range(1, 10001))

        for i in x:
            if str(i) in records:
                conclusive_simulations += records[str(i)]['simulation']

            if (conclusive_simulations >= 80) and (print_counter < 1):
                print(f'For minimum detectable effect of {epsilons[eps]}%, {80}% of simulations needed {i} samples to be conclusive')
                print_counter += 1

            plotting_conclusions.append(conclusive_simulations)


        ax.plot(x, plotting_conclusions, label=f'{epsilons[eps]}')

    ax.plot(list(range(1,10001)), np.full((10000,), 80), label='80% threshold', linestyle='dashed')
    ax.legend()
    ax.set_xlabel('Sample Size')
    ax.set_ylabel('Proportion of Conclusive Simulations')
    ax.set_title('Conclusive Simulations for varying ϵ')

    plt.show()

def plot_lambda_simulations():
    _, ax = plt.subplots()

    lambda_names = {'low': 'low_lambda',
                    'medium': '37',
                    'high': 'high_lambda'}

    lambdas = {'low': 5,
               'medium': 32,
               'high': 60}

    for cr in lambda_names:

        simulations = pd.read_csv(f'experiment_simulations_{lambda_names[cr]}.csv')

        simulations = simulations[simulations['winner'] != 'inconclusive']
        
        simulations = simulations[simulations['sample'] >= 200]

        grouped = simulations[['simulation', 'sample']].groupby('simulation', as_index=False).min()

        results = simulations.merge(grouped, on=['simulation', 'sample'])

        hist = results[['simulation', 'sample']].groupby('sample', as_index=False).count()

        records = json.loads(hist.set_index('sample').to_json(orient='index'))

        conclusive_simulations = 0
        print_counter = 0
        plotting_conclusions = []
        x = list(range(1, 10001))

        for i in x:
            if str(i) in records:
                conclusive_simulations += records[str(i)]['simulation']

            if (conclusive_simulations >= 80) and (print_counter < 1):
                print(f'For minimum detectable effect of {lambdas[cr]}%, {80}% of simulations needed {i} samples to be conclusive')
                print_counter += 1

            plotting_conclusions.append(conclusive_simulations)


        ax.plot(x, plotting_conclusions, label=f'{lambdas[cr]}%')
    
    ax.plot(list(range(1,10001)), np.full((10000,), 80), label='80% threshold', linestyle='dashed')

    ax.legend()
    ax.set_xlabel('Sample Size')
    ax.set_ylabel('Proportion of Conclusive Simulations')
    ax.set_title('Conclusive Simulations for varying λ')
    
    plt.show()



if __name__ == "__main__":
    
    plot_delta_simulations()

    plot_epsilon_simulations()

    plot_lambda_simulations()

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

def plot_peeking_example():
    simulations = pd.read_csv('experiment_simulations_same_cr.csv')
    simulations['sample'] = simulations['sample'] + 1

    for i in set(simulations['simulation'].values):
        if i != 3:
            continue

        filtered_simulation = simulations[simulations['simulation'] == i]

        _, (ax1, ax2) = plt.subplots(1, 2)

        
        ax1.plot(filtered_simulation['sample'], filtered_simulation['treatment_expected_loss'], label='treatment')
        ax1.plot(filtered_simulation['sample'], filtered_simulation['control_expected_loss'], label='control')
        ax1.plot(filtered_simulation['sample'], np.full((10000,), 0.003), label='threshold', linestyle='dashed')
        ax1.set_xlabel('Sample Size')
        ax1.set_ylabel('Expected Loss')
        ax1.set_title('Expected Loss Simulation')
        ax1.legend()
        
        
        ax2.plot(filtered_simulation['sample'], filtered_simulation['treatment_cr'], label='treatment')
        ax2.plot(filtered_simulation['sample'], filtered_simulation['control_cr'], label='control')
        ax2.set_xlabel('Sample Size')
        ax2.set_ylabel('Conversion Rates')
        ax2.set_title('Conversion Rates Simulation')
        ax2.legend()
        # plt.savefig(f'Expected Loss Simulation {i}')
        plt.show()

def plot_simulations(file_name):
    simulations = pd.read_csv(file_name)
    no_of_simulations = simulations['simulation'].max()

    _, (ax1, ax2) = plt.subplots(1, 2)

    for i in range(1, no_of_simulations+1):

        filtered = simulations[simulations['simulation'] == i]


        if i == 1:
            control = ax1.plot(filtered['sample'], filtered['control_expected_loss'], label='control')
            treatment = ax1.plot(filtered['sample'], filtered['treatment_expected_loss'], label='treatment')
            threshold = ax1.plot(filtered['sample'], np.full((10000,), 0.0015), label='threshold', linestyle='dashed')

            ax2.plot(filtered['sample'], filtered['control_cr'], label='control', color=control[0].get_color())
            ax2.plot(filtered['sample'], filtered['treatment_cr'], label='treatment', color=treatment[0].get_color())
        else:
            ax1.plot(filtered['sample'], filtered['control_expected_loss'], linewidth=0.25, color=control[0].get_color(), alpha=0.3)
            ax1.plot(filtered['sample'], filtered['treatment_expected_loss'], linewidth=0.25, color=treatment[0].get_color(), alpha=0.3)
            ax2.plot(filtered['sample'], filtered['control_cr'], linewidth=0.25, color=control[0].get_color(), alpha=0.3)
            ax2.plot(filtered['sample'], filtered['treatment_cr'], linewidth=0.25, color=treatment[0].get_color(), alpha=0.3)
        
        
        ax1.set_xlabel('Sample Size')
        ax1.set_ylabel('Expected Loss')
        ax1.set_title('Expected Loss Simulation')
        ax1.legend()
        
        
        ax2.set_xlabel('Sample Size')
        ax2.set_ylabel('Conversion Rates')
        ax2.set_title('Conversion Rates Simulation')
        ax2.legend()
    
    ax1.plot(filtered['sample'], np.full((10000,), 0.0015), label='threshold', linestyle='dashed', color=threshold[0].get_color())

    plt.show()

if __name__ == "__main__":
    plot_peeking_example()

    plot_simulations('experiment_simulations_37.csv')

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from scipy.stats import beta
import matplotlib.pyplot as plt
import json
import decimal
decimal.getcontext().prec = 4


def calculate_expected_loss(control_simulation, treatment_simulation, treatment_won, min_difference_delta=0):
    loss_control = [max((j - min_difference_delta) - i, 0) for i,j in zip(control_simulation, treatment_simulation)]
    loss_treatment = [max(i - (j - min_difference_delta), 0) for i,j in zip(control_simulation, treatment_simulation)]

    all_loss_control = [int(i)*j for i,j in zip(treatment_won, loss_control)]
    all_loss_treatment = [(1 - int(i))*j for i,j in zip(treatment_won, loss_treatment)]

    expected_loss_control = np.mean(all_loss_control)
    expected_loss_treatment = np.mean(all_loss_treatment)
    return expected_loss_control, expected_loss_treatment


def run_multiple_experiment_simulations(n, prior_alpha, prior_beta, control_cr, treatment_cr, epsilon, variant_sample_size=10000, min_simulations_per_experiment=0):
    output = pd.DataFrame()

    for simulation in range(0,n):
        records = []
        control_simulations = np.random.binomial(n=1, p=control_cr, size=variant_sample_size)
        treatment_simulations = np.random.binomial(n=1, p=treatment_cr, size=variant_sample_size)
        
        sample_size = 0
        control_conversions = 0
        treatment_conversions = 0

        for i in range(variant_sample_size):
            sample_size += 1
            control_conversions += control_simulations[i]
            treatment_conversions += treatment_simulations[i]

            control_pdfs = np.random.beta(prior_alpha + control_conversions, prior_beta + sample_size - control_conversions, size=1000)
            treatment_pdfs = np.random.beta(prior_alpha + treatment_conversions, prior_beta + sample_size - treatment_conversions, size=1000)
            treatment_pdf_higher = [i <= j for i,j in zip(control_pdfs, treatment_pdfs)]

            expected_loss_control, expected_loss_treatment = calculate_expected_loss(control_pdfs, treatment_pdfs, treatment_pdf_higher)

            if (simulation >= min_simulations_per_experiment) and (expected_loss_treatment <= epsilon):
                records.append({'simulation': simulation+1, 'sample': sample_size, 'treatment_cr': (treatment_conversions/sample_size), 'control_cr': (control_conversions/sample_size), 'treatment_expected_loss': expected_loss_treatment, 'control_expected_loss': expected_loss_control, 'winner': 'treatment'})
            elif (simulation >= min_simulations_per_experiment) and expected_loss_control <= epsilon:
                records.append({'simulation': simulation+1, 'sample': sample_size, 'treatment_cr': (treatment_conversions/sample_size), 'control_cr': (control_conversions/sample_size), 'treatment_expected_loss': expected_loss_treatment, 'control_expected_loss': expected_loss_control, 'winner': 'control'})
            else:
                records.append({'simulation': simulation+1, 'sample': sample_size, 'treatment_cr': (treatment_conversions/sample_size), 'control_cr': (control_conversions/sample_size), 'treatment_expected_loss': expected_loss_treatment, 'control_expected_loss': expected_loss_control, 'winner': 'inconclusive'})

        simulation_results = pd.DataFrame.from_records(records)
        output = pd.concat([output, simulation_results])    
    
    return output


if __name__ == "__main__":

    standard_simulations = run_multiple_experiment_simulations(100, 7, 15, 0.32, 0.32*(1.15), 0.0015)
    standard_simulations.to_csv('experiment_simulations_37.csv', index=False)

    low_mde_simulations = run_multiple_experiment_simulations(100, 7, 15, 0.32, 0.32*(1.05), 0.0015)
    low_mde_simulations.to_csv('experiment_simulations_34.csv', index=False)

    high_mde_simulations = run_multiple_experiment_simulations(100, 7, 15, 0.32, 0.32*(1.3), 0.0015)
    high_mde_simulations.to_csv('experiment_simulations_41.csv', index=False)

    low_eps_simulations = run_multiple_experiment_simulations(100, 7, 15, 0.32, 0.32*(1.15), 0.0005)
    low_eps_simulations.to_csv('experiment_simulations_low_eps.csv', index=False)

    high_eps_simulations = run_multiple_experiment_simulations(100, 7, 15, 0.32, 0.32*(1.15), 0.003)
    high_eps_simulations.to_csv('experiment_simulations_high_eps.csv', index=False)

    low_lambda_simulations = run_multiple_experiment_simulations(100, 2, 20, 0.05, 0.05*(1.15), 0.05*(0.005))
    low_lambda_simulations.to_csv('experiment_simulations_low_lambda.csv', index=False)

    high_lambda_simulations = run_multiple_experiment_simulations(100, 13, 9, 0.6, 0.6*(1.15), 0.6*(0.005))
    high_lambda_simulations.to_csv('experiment_simulations_high_lambda.csv', index=False)

In [None]:
b=[1,3,4,6]
a=[(i+1) for i in b]
print(a)

In [None]:
a = np.array([1,2,3,4])
b = np.array([2,3,4,5])
c = zip(a,b)

print(tuple(c))

In [None]:
a = ("John", "Charles", "Mike")
b = ("Jenny", "Christy", "Monica")

x = zip(a, b)

#use the tuple() function to display a readable version of the result:

print(tuple(x))