# Probability

In [None]:
import wget
import os
# download CSV
url_csv = "https://github.com/AllenDowney/BiteSizeBayes/raw/master/gss_bayes.csv"
if not os.path.exists("files/gss_bayes.csv"):
    wget.download(url_csv, "files/gss_bayes.csv")

In [None]:
import pandas as pd
gss = pd.read_csv('gss_bayes.csv', index_col=0)
gss.head()

In [None]:
banker = gss['indus10'] == 6870 # create boolean matrix (values True or False) 
banker

In [None]:
banker.sum() # sum all values, True=1 , False=0

In [None]:
# Note: banker.size == len(banker) 
banker.mean() # = 728/49290 = banker.sum/banker.size = probability

In [None]:
# probability of female
(gss['sex']==2).mean()

In [None]:
# probability of polviews <= 3 (liberal)
(gss["polviews"]<=3).mean()

In [None]:
# probability of democrat
(gss["partyid"]<=1).mean()

In [None]:
# probability funcion
def prob(A): 
    # A is a boolean matrix
    return A.mean() 

## Conjunction (AND)

In [None]:
banker = gss['indus10'] == 6870
democrat = gss["partyid"] <= 1
(banker & democrat).mean() # or prob(banker & democrat)

## Conditional Probability (A/B)

In [None]:
# Calculate: P(liberal/democrat)

liberal = gss["polviews"] <= 3
democrat = gss["partyid"] <= 1

# the democrats, of which they are liberal.
# if value of liberal matrix is False -> ignore 
ans = democrat[liberal] 

# print(liberal.size) # All True + False = 49290
# print(liberal.sum()) # Only True = 13493
# print(ans.size) # = liberal.sum() = 13493

print("P(liberal/democrat):\t",ans.mean())

In [None]:
# Calculate: P(female/banker)

female = gss['sex'] == 2
banker = gss['indus10'] == 6870

ans = female[banker]

print("P(female/banker):\t",ans.mean())

In [None]:
def conditional(proposition, given):
    return prob(proposition[given])

# examples
ans_1 = conditional(liberal & female, given=banker)
ans_2 = conditional(banker, given=liberal & female)
ans_3 = conditional(female, given=liberal & democrat)
print("Ej1. P((liberal & female)/banker):\t", ans_1)
print("Ej2. P(banker/(liberal & female)):\t", ans_2)
print("Ej3. P(female/(liberal & democrat)):\t", ans_3)

## Baye's Theorem
**Theorem 1** gives us a way to compute a conditional probability using a conjunction:

$$P(A|B) = \frac{P(A~\mathrm{and}~B)}{P(B)}$$

**Theorem 2** gives us a way to compute a conjunction using a conditional probability:

$$P(A~\mathrm{and}~B) = P(B) P(A|B)$$

**Theorem 3**, also known as Bayes's Theorem, gives us a way to get from $P(A|B)$ to $P(B|A)$, or the other way around:

$$P(A|B) = \frac{P(A) P(B|A)}{P(B)}$$

## Exercises

**Exercise 1**
* The probability that Linda is a female banker,
* The probability that Linda is a liberal female banker, and
* The probability that Linda is a liberal female banker and a Democrat.

In [None]:
print((female & banker).mean())
print((female & banker & liberal).mean())
print((female & banker & liberal & democrat).mean())

**Exercise 2**
* What is the probability that a respondent is liberal, given that they are a Democrat?
* What is the probability that a respondent is a Democrat, given that they are liberal?

In [None]:
print(liberal[democrat].mean())
print(democrat[liberal].mean())

**Exercise 3**
* What is the probability that a randomly chosen respondent is a young liberal?
* What is the probability that a young person is liberal?
* What fraction of respondents are old conservatives?
* What fraction of conservatives are old?

_Note:_ 
```python
young = gss["age"] < 30
old = gss["age"] >= 65
conservative = gss["polviews"] >= 5
```

In [None]:
young = gss["age"] < 30
old = gss["age"] >= 65
conservative = gss["polviews"] >= 5

# solution
print((liberal & young).mean())
print(liberal[young].mean())
print((conservative & old).mean())
print(old[conservative].mean())