In [3]:
import pandas as pd

In [4]:
filename = 'https://github.com/AllenDowney/BiteSizeBayes/raw/master/gss_bayes.csv'
df = pd.read_csv(filename)
df.head()

Unnamed: 0,caseid,year,age,sex,polviews,partyid,indus10
0,1,1974,21.0,1,4.0,2.0,4970.0
1,2,1974,41.0,1,5.0,0.0,9160.0
2,5,1974,58.0,2,6.0,1.0,2670.0
3,6,1974,30.0,1,5.0,4.0,6870.0
4,7,1974,48.0,1,5.0,4.0,7860.0


In [5]:
# Proportion of Bankers
banker = df["indus10"] == 6870
banker.mean()

0.014769730168391155

In [6]:
# Probability function 
def prob(A):
    """Accepts a series/array of booleans and returns the proportion of population that matches"""
    return A.mean()

In [7]:
prob(banker)

0.014769730168391155

In [8]:
women = df.sex == 2
prob(women)

0.5378575776019476

In [9]:
liberal = df["polviews"] <= 3
prob(liberal)

0.27374721038750255

In [13]:
democrat = df['partyid'] <= 1
prob(democrat)

0.3662609048488537

## Conjunction
- Conjunction == Logical AND

In [10]:
prob(banker & women & liberal)

0.002556299452221546

In [11]:
# Conjunction is commutative b/c AND is commutative
prob(banker & women) == prob(women & banker)

True

## Conditional Probability
- What is the probability that a respondent is a democrat, given that they are liberal?
- What is the prob(female) given that they are a banker?
- What is the prob(banker) given that they are a woman?

Let’s start with the first one, which we can interpret like this: “Of all the respondents who are liberal, what fraction are Democrats?”

In [14]:
selected = democrat[liberal]
selected.mean()

0.5206403320240125

In [18]:
women[banker].mean()

0.7706043956043956

In [22]:
def conditional(proposition, given):
    """Probability of A conditioned on given."""
    return prob(proposition[given])

In [31]:
# Probability of someone being a woman given that they're a banker
# Of the folks who are bankers, 77% are women
df[banker].sex.value_counts(normalize=True)

2    0.770604
1    0.229396
Name: sex, dtype: float64

In [24]:
# Probability of someone being a woman given that they're a banker
# Of the folks who are bankers, 77% are women
conditional(women, given=banker)

0.7706043956043956

In [23]:
# Probability of someone being a banker given that they're a woman
# Of all women, 2% are bankers
conditional(banker, given=women)

0.02116102749801969

In [29]:
# Probability of someone being a banker, given that they're a woman
# Of all women, what's the likelihood of being a banker?
df[women].indus10.value_counts(normalize=True)[6870]

0.02116102749801969

In [27]:
# Contitional Probabilities AIN'T commutative!!!
conditional(women, given=banker) == conditional(banker, given=women)

False