# Six route wheel spins

In [2]:
from random import *
from statistics import *
from collections import *

population = ['red'] * 18 + ['black'] * 18 + ['green'] * 2
choice(population)

'red'

In [3]:
[choice(population) for i in range(6)]

['black', 'black', 'green', 'red', 'red', 'black']

In [4]:
Counter([choice(population) for i in range(6)])

Counter({'black': 3, 'red': 3})

In [5]:
Counter(choices(population, k = 6))

Counter({'black': 3, 'red': 3})

In [6]:
Counter(choices(['red', 'black', 'green'], [18, 18, 2], k = 6))

Counter({'black': 2, 'red': 4})

# Playing cards

In [7]:
deck = Counter(tens = 16, low = 36)
deck = list(deck.elements())
deal = sample(deck, 52)
remainder = deal[20:]
Counter(remainder)

Counter({'low': 24, 'tens': 8})

# 5 or more heads from 7 spins of a biased coin

In [8]:
# empirical result
trial = lambda : choices(['heads', 'tails'], cum_weights=[0.60, 1.00], k = 7).count('heads') >= 5
n = 100000
sum(trial() for i in range(n)) / n

0.41835

In [9]:
# Compare to the analytic approach
# theoretical result
from math import factorial as fact
def comb(n, r):
    return fact(n) // fact(r) // fact(n - r)
comb(10, 3)

ph = 0.6
# 5 heads out of 7 spins
ph ** 5 * (1 - ph) ** 2 * comb(7, 5) + \
ph ** 6 * (1 - ph) ** 1 * comb(7, 6) + \
ph ** 7 * (1 - ph) ** 0 * comb(7, 7)


0.419904

# Probability that median of 5 samples falls a middle quartile

In [10]:
trial = lambda : n // 4 <= median(sample(range(n), 5)) <= 3 * n // 4
sum(trial() for i in range(n)) / n

0.7924

# Confidence intervals

In [11]:
timings = [7.8, 8.9, 9.1, 6.9, 10.1, 15.6, 12.6, 9.1, 8.6, 6.8, 7.9, 8.1, 9.6]
def bootstrap(data):
    return choices(data, k=len(data))
n = 10000
means = sorted(mean(bootstrap(timings)) for i in range(n))
print(f'The observed mean of {mean(timings)}')
print(f'Falls in 90% confidence interval from {means[500] : .1f} to {means[-500] : .1f}')

The observed mean of 9.315384615384616
Falls in 90% confidence interval from  8.4 to  10.4


# Statistical difference

In [12]:
drug = [7.8, 8.9, 9.1, 6.9, 10.1, 15.6, 12.6, 9.1, 8.6, 6.8]
placedo = [7.8, 8.1, 9.1, 6.9, 3.2, 10.6, 10.6, 8.1, 8.6, 6.8]
obs_diff = mean(drug) - mean(placedo)
print(obs_diff)

1.5700000000000012


# Null hypothesis assumes 2 groups are equivalent

In [23]:
n = len(drug)
comb = drug + placedo
newdiffs = []
def trail():
    shuffle(comb)
    drug = comb[:n]
    placedo = comb[n:]
    new_diff = mean(drug) - mean(placedo)
    return new_diff >= obs_diff
count = 100000
sum(trail() for i in range(count)) / count    #p-value. If p-value is <= 0.05, then it is statistical different.

0.08806

# Toss coins

In [21]:
# Toss a coind 30 times and see 22 heads. Is it a fair coin?
# Assue the Skeptic is correct: Even a fair coind could show 22 heads in 30 tosses. It might be just chance.
# Test the Null Hypothesis: What's the probability of a fair coin showing 22 heads simply by chance.
# The code below is doing simulation.
m = 0
n = 10000
for i in range(n):
    if sum(randint(0, 1) for j in range(30)) >= 22:
        m += 1
pvalue = m / n
print(pvalue)
# pvalue is around 0.008, reject fair coin hypothesis at p < 0.05. So it is not a fair coin. The coin is biased.

0.0081


# If pvalue is below 0.05, significant difference does exist and reject the Null Hypothesis. If it is over 0.05,  significant difference does not exist. 