# Tutorial on common statistical traps

In [3]:
import numpy as np
import matplotlib.pyplot as plt

## Exercise 1: The trouble with p-values

### Part I: p-values and their frequentist sense

In [4]:
np.random.seed(17)

mu = 0
sigma = 1
N = int(1e6)

gaussian = np.random.normal(mu, sigma, N)

samples_greater_than_three = 0
for val in gaussian:
    if val > 3:
        samples_greater_than_three+=1

fraction_of_samples = samples_greater_than_three/N

In [5]:
fraction_of_samples

0.001327

### Part II: p-values in hypothesis testing

In [6]:
num_rejects = 0
n_tries = np.int(1e6)

for i in range(n_tries):
    index = int(np.random.rand()*N)
    if gaussian[index] > 3:
        num_rejects+=1

In [7]:
num_rejects # 2 if 1e3, 1300 if 1e6

1289

### Part III: p-values and how they accumulate our own mistakes

In [8]:
N = np.int(1e6)
gaussian = np.random.normal(0,1.5,N)
samples_greater_than_three = 0
for val in gaussian:
    if val > 3:
        samples_greater_than_three+=1

fraction_of_samples = samples_greater_than_three/N
print(fraction_of_samples)

num_rejects = 0
n_tries = np.int(1e6)

for i in range(n_tries):
    index = int(np.random.rand()*N)
    if gaussian[index] > 3:
        num_rejects+=1
print(num_rejects)

0.022793
22668


In [9]:
N = np.int(1e6)
gaussian = np.random.normal(-0.5,1,N)
samples_greater_than_three = 0
for val in gaussian:
    if val > 3:
        samples_greater_than_three+=1

fraction_of_samples = samples_greater_than_three/N
print(fraction_of_samples)

num_rejects = 0
n_tries = np.int(1e6)

for i in range(n_tries):
    index = int(np.random.rand()*N)
    if gaussian[index] > 3:
        num_rejects+=1
print(num_rejects)

0.000252
233
