# Chapter 2 Practice

In [None]:
import altair as alt
import numpy as np
import pandas as pd
from scipy import stats

### 2M1

In [None]:
observations = [
    ["W", "W", "W"],
    ["W", "W", "W", "L"],
    ["L", "W", "W", "L", "W", "W", "W"],
]

In [None]:
p_grid = np.linspace(0, 1, num=100)
prior = np.ones_like(p_grid)

df = pd.DataFrame({"p_grid": p_grid, "prior": prior})

for i, obs in enumerate(observations):
    n = len(obs)  # number of observations
    k = obs.count("W")  # number of water measurements

    prob_data = stats.binom.pmf(k=k, n=n, p=p_grid)
    posterior = prior * prob_data

    df[f"posterior_{i+1}"] = posterior

df

In [None]:
# convert wide df to tall, keep `p_grid` column
df = df.melt("p_grid", value_name="density")

alt.Chart(df, title="probabilities").mark_line().encode(
    alt.X("p_grid", title="water ratio"),
    y="density",
    color="variable",
)

### 2M2

In [None]:
p_grid = np.linspace(0, 1, num=100)
prior = np.ones_like(p_grid)
prior[p_grid < 0.5] = 0

df = pd.DataFrame({"p_grid": p_grid, "prior": prior})

for i, obs in enumerate(observations):
    n = len(obs)  # number of observations
    k = len(list(filter(lambda o: o == "W", obs)))  # number of water measurements

    prob_data = stats.binom.pmf(k=k, n=n, p=p_grid)
    posterior = prior * prob_data

    df[f"posterior_{i+1}"] = posterior

In [None]:
# convert wide df to tall, keep `p_grid` column
df = df.melt("p_grid", value_name="density")

alt.Chart(df, title="probabilities").mark_line().encode(
    alt.X("p_grid", title="water ratio"),
    y="density",
    color="variable",
)

### 2M3

$$
  P(Earth|Land) = \frac{P(Land|Earth)*P(Earth)}{P(Land)}
$$

In [None]:
p = (1 - 0.7) * 0.5 / ((1 - 0.7) * 0.5 + 0.5)
p

In [None]:
p_land_earth = (1 - 0.7) * 0.5
p_land_mars = 1 * 0.5
p_water_earth = 0.7 * 0.5
p_water_mars = 0
df = pd.DataFrame(
    {"land": [p_land_earth, p_land_mars], "water": [p_water_earth, p_water_mars]},
    index=["earth", "mars"],
)

assert df.values.sum() == 1.0
df

In [None]:
# P(Earth|Land)
df["land"].loc["earth"] / df["land"].sum()

### 2M4

```
cards: BB, BW, WW
```

$$
P(BB|B) = \frac{P(B|BB)*P(BB)}{P(B)}
$$

In [None]:
p = (1 * 1 / 3) / (3 / 6)
p

### 2M5

In [None]:
p = (1 * 2 / 4) / (5 / 8)
p

### 2M6

```
 P(BB) = 1 / 6
 P(BW) = 2 / 6
 P(WW) = 3 / 6
```

In [None]:
p = (1 * 1 / 6) / (1 / 6 * 1 + 2 / 6 * 1 / 2 + 3 / 6 * 0)
p

### 2M7

```
  First draw: BB           BB           BW           WB           WW           WW
 Second draw: BW WB WW WW  BW WB WW WW  BB BB WW WW  BB BB WW WW  BB BB WB BW  BB BB WB BW
     Matches:    y  y  y      y  y  y         n  n
```

In [None]:
6 / 8

### 2H1

In [None]:
p_a_twins = 0.1 * 0.5
p_a_single = (1 - 0.1) * 0.5
p_b_twins = 0.2 * 0.5
p_b_single = (1 - 0.2) * 0.5

df = pd.DataFrame(
    {"a_species": [p_a_twins, p_a_single], "b_species": [p_b_twins, p_b_single]},
    index=["twins", "single"],
)

assert df.values.sum() == 1.0
df

```
 draw species: A 0.5                         |  B 0.5
    1st birth: T 0.1           S 0.9         |  T 0.2          S 0.8      
    2nd birth: T 0.1   S 0.9   T 0.1   S 0.9 |  T 0.2  S 0.8   T 0.2  S 0.8
               x                             |  x
 P(T|[A,B],T): 0.1                           |  0.2

     P(T|T) = P(T|A,T) * P(A) + P(T|B,T) * P(B) = 0.1 * 0.5 + 0.2 * 0.5 = 0.15
```

### 2H2

```
P(A|T) = 0.05 / (0.05 + 0.1) = 0.333
```

### 2H3

```
P(A|T,S) = P(T,S|A) / [P(T,S|A) + P(T,S|B)] =  0.1 * 0.9 / (0.1 * 0.9 + 0.2 * 0.8) = 0.36
```

### 2H4

```
draw species: A 0.5                           B 0.5
        Test: P 0.8           N 0.2           P 0.65          N 0.35
 Observation: A               B               B               A

    P(A|Pos) = 0.8 * 0.5 / (0.8 * 0.5 + 0.35 * 0.5)
             = 0.696

  P(A|Pos,T) = P(Pos,T|A) * P(A) / (P(Pos,T|A) + P(Pos,T|B)) 
             = 0.8 * 0.1 * 0.5 / (0.8 * 0.1 * 0.5 + 0.35 * 0.2 * 0.5)
             = 0.533

             # same using Bayesian update with P(A|Pos) as prior
             = 0.1 * 0.696 / (0.1 * 0.696 + 0.2 * (1 - 0.696))
             = 0.533
             

      let D = Pos,T,S - all evidence we have
      P(A|D) = P(D|A) * P(A) / (P(D|A) * P(A) + P(D|B) * P(B))
             = 0.8 * 0.1 * 0.9 * 0.5 / (0.8 * 0.1 * 0.9 * 0.5 + 0.35 * 0.2 * 0.8 * 0.5)
             = 0.562
             
             # same using Bayesian update with P(A|Pos,T) as prior
             = 0.9 * 0.533 / (0.9 * 0.533 + 0.8 * (1 - 0.533))
             = 0.562
```