In [None]:
import os
import warnings; warnings.simplefilter('ignore')
import sys
import pandas as pd
import numpy as np
from simulation import Simulation
from abmethods import Naive, DeltaMethod, MixedModel
from simuldata import UncorBinom, HetBinom

Two main distributions were used for the power analysis.
1. **Uncorrelated Data:** A Binomial Distribution with no correlation within users
2. **Correlated Data:** A Binomial Distribution with correlation within users, with user's use-rate defined by varying Poisson distributions

In [None]:
import inspect
print(f"""
===== Uncorrelated Data =====
{inspect.getsource(UncorBinom)}



===== Uncorrelated Data =====
{inspect.getsource(HetBinom)}
""")

In [None]:
# tests to compare - I took out Mixed model for performance reasons...
tests=[Naive(), DeltaMethod()]


### Simulation Data Parameters ###
# "Click Through Rate"
mu=[0.3, 0.5, 0.8]
# noise
sigma=[0.05, 0.1, 0.05]
# Frequency cohorts
lamb=[2, 5, 30]
# cohort sizes
probs=[1/3, 1/2, 1/6]
# sample size
n=1000
# number of runs
m=100

# Simulation Data
bindata= UncorBinom(1, .6, n)
hetdata = HetBinom(lamb, mu, sigma, n, probs)

# Simulation 
hetsim = Simulation(m, hetdata, tests)
binsim = Simulation(m, bindata, tests)

In [None]:
bindata.data()['unitlevel'].head(20)

In [None]:
hetdata.data()['unitlevel'].head(20)

In [None]:
hetsim.run()
hetsim.report

In [None]:
binsim.run()
binsim.report

## Power Analysis<a name="poweranalysis"></a>

When there is a real effect, how large does the effect have to be for the test to find it?

Basic Steps to Power Analysis

1. simulate data that closely mimics real world data with a known estimate and variance
2. increase effect size of the 'variant'; detect if test is significant
3. repeat steps 1 and 2 to calculate Power at a certain effect size


In [None]:
hetdf=hetsim.power_test()
effect_sizes = hetdf['effect_size'].unique()
filter_sizes = [effect for i, effect in enumerate(effect_sizes) if i%3 == 0]
het_summary = hetdf.loc[hetdf['effect_size'].isin(filter_sizes), ['test','effect_size', 'p']].groupby(['effect_size', 'test']).mean()

bindf=binsim.power_test()
bindf['truevar']= (.6 + bindf['effect_size'].astype(float))*(1 - .6 - bindf['effect_size'].astype(float))
bindf[bindf['test'].isin(['Delta Method', 'Group Method(NS)'])].groupby(['test', 'effect_size']).mean()
effect_sizes = bindf['effect_size'].unique()
filter_sizes = [effect for i, effect in enumerate(effect_sizes) if i%3 == 0]
bin_summary = bindf.loc[bindf['effect_size'].isin(filter_sizes), ['test','effect_size', 'p']].groupby(['effect_size', 'test']).mean()


In [None]:
binsim.plot_power(bindf)

In [None]:
hetsim.plot_power(hetdf)