In [4]:
import numpy
from scipy.stats import uniform
from statsmodels.stats.weightstats import CompareMeans, ttest_ind

In [117]:
checks_test = numpy.loadtxt('checks_test.csv', delimiter=',')
print(checks_test[:20])

[ 15.  40.  10.  15.  30.  65.  10.   0.  25.   5.   5.  10.  10.  40.
  15.   5.   5. 110.   0.   5.]


In [116]:
checks_control = numpy.loadtxt('checks_control.csv', delimiter=',')
print(checks_control[:20])

[ 0.  5.  5. 15.  5. 20.  5. 35.  5. 15.  5.  0. 10. 35. 15. 10. 30. 10.
 20. 10.]


In [119]:
test_to_control = CompareMeans.from_data(
    checks_test, 
    checks_control
)
test_to_control.tconfint_diff(0.10, 'two-sided', 'unequal')

(-0.31240373943457334, 2.582403739434573)

In [121]:
logchecks_test = numpy.log(checks_test)
logchecks_control = numpy.log(checks_control)

In [123]:
test_to_control = CompareMeans.from_data(
    logchecks_test, 
    logchecks_control
)
test_to_control.tconfint_diff(0.10, 'two-sided', 'unequal')

(0.010391539300251745, 0.16908253324879227)

<br><br><br>

In [3]:
from my_montecarlo import run_montecarloAB

In [25]:
def student_larger(A, B):
    
    t_stats, p_values, _ = ttest_ind(
        B + 1, 
        A + 1, 
        alternative='larger', 
        usevar='unequal'
    )
    
    p_values_cleaned = numpy.where(
        numpy.isnan(p_values), 
        0, 
        p_values
    )        
    
    return p_values_cleaned

In [26]:
def log_student_larger(A, B):
    
    t_stats, p_values, _ = ttest_ind(
        numpy.log(B + 1), 
        numpy.log(A + 1), 
        alternative='larger', 
        usevar='unequal'
    )
    
    p_values_cleaned = numpy.where(
        numpy.isnan(p_values), 
        0, 
        p_values
    )        
    
    return p_values_cleaned

In [37]:
n_negatives, n_positives, l, r, p_values = run_montecarloAB(
    student_larger,
    latent_distrA=uniform(0, 8),
    latent_distrB=uniform(3, 2),
    sample_size=20,
    n_tests=10000
)
print(f'FPR is {n_positives / (n_positives + n_negatives):.3f} ± {(r - l) / 2:.3f}')

FPR is 0.049 ± 0.004


In [38]:
n_negatives, n_positives, l, r, p_values = run_montecarloAB(
    log_student_larger,
    latent_distrA=uniform(0, 8),
    latent_distrB=uniform(3, 2),
    sample_size=30,
    n_tests=10000
)
print(f'FPR is {n_positives / (n_positives + n_negatives):.3f} ± {(r - l) / 2:.3f}')

FPR is 0.296 ± 0.009


<br><br><br><br><br>