In [None]:
!pip install pingouin

In [39]:
import pandas as pd
import numpy as np
import pingouin as pg
from pingouin import ttest
import seaborn as sns
sns.set()


# Null Hypothesis Significance Testing (NHST)

## Hypothesis 1: People with an "O" in their name are usually younger than 25 
## Null hypothesis: People with an "O" in their name are not usually younger than 25

### Statistical model: One-sample t test

$$ t = \frac{\overline{x} - \mu_0}{s/\sqrt{n}}$$

$\overline{x}$ = sample mean

$\mu_0 = 25$

$s$ = sample standard deviation

$\sqrt{n}$ = square root of sample size


In [54]:
ttest([20, 24, 22, 23], 25)

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-3.22047,3,two-sided,0.048567,"[19.53, 24.97]",1.610235,2.429,0.586596


# Bayesian Hypothesis Testing

# $$P(A|B) = \frac{P(B|A)P(A)}{P(B|A)+P(B|\neg{A})}$$

$P(A|B)$: The posterior probability (the probability of hypothesis A being true given data B)

$P(A)$: The prior probability (the background probability of A being true without new evidence)

$P(B|A)$: The likelihood (how likely is data B in the event that A is true?)

$P(B)$: The marginal probability (the probability of data B across all cases). It cannot sum to more than 1.

## Hypotheses

$H_0$: The average age of this class is less than 20

$H_1$: The average age of this class is 20

$H_2$: The average age of this class is 21

$H_3$: The average age of this class is 22

$H_4$: The average age of this class is 23

$H_5$: The average age of this class is 24

$H_6$: The average age of this class is 25

$H_7$: The average age of this class is greater than 25

## Bayes calculation for a specific hypothesis, $H_x$:
<br>

$$P(H_x|B) = \frac{P(B|H_x)P(H_x)}{P(B|H_0)+P(B|H_1)+P(B|H_2)+P(B|H_3)+P(B|H_4)+P(B|H_5)+P(B|H_6)+P(B|H_7)}$$


## Assign our priors

https://forms.office.com/Pages/ResponsePage.aspx?id=sA0np77paEabo0fDROUm_3eXhjWdjlZIqcqwAIeKbuVUMVI5VEMxVzVNVDNTWDRIVkFHR0hBSjdIUS4u

## Collect some data

Would people with a "T" in their name please stand up?



In [None]:
sample = ??

In [44]:
data = pd.read_excel('/The Matter of Thought_ Bayesian Hypothesis Testing(1-2).xlsx')
real_average = data['What age are you?'].mean()
data = data[['The probability that the average age of this class is less than 20 is:',
       'The probability that the average age of this class is 20 is:',
       'The probability that the average age of this class is 21 is:',
       'The probability that the average age of this class is 22 is:',
       'The probability that the average age of this class is 23 is:',
       'The probability that the average age of this class is 24 is:',
       'The probability that the average age of this class is 25 is:',
       'The probability that the average age of this class is greater than 25 is:']]

data = data.mean()


In [49]:
prior = 0.5
likelihood = 0.4
marginal = {'H0': 1, 'H1': 1,'H2': 1, 'H3': 1, 'H4': 1,'H5': 1,'H6': 1,'H7': 1}
marginal = sum([i for i in marginal.values()])



In [52]:
def bayes(prior, marginal, likelihood):
    p = (likelihood*prior)/marginal
    return p

In [46]:
data

The probability that the average age of this class is less than 20 is:       0.55
The probability that the average age of this class is 20 is:                 0.10
The probability that the average age of this class is 21 is:                 0.10
The probability that the average age of this class is 22 is:                 0.10
The probability that the average age of this class is 23 is:                 0.10
The probability that the average age of this class is 24 is:                 0.10
The probability that the average age of this class is 25 is:                 0.10
The probability that the average age of this class is greater than 25 is:    0.30
dtype: float64

In [47]:
bayes(0.3, 0.1, 0.01)

0.03