In [1]:
import numpy as np
import pandas as pd

In [2]:
# Load the data file

from os.path import basename, exists

def download(url):
    filename = basename(url)
    if not exists(filename):
        from urllib.request import urlretrieve
        local, _ = urlretrieve(url, filename)
        print('Downloaded ' + local)
    
download('https://github.com/AllenDowney/ThinkBayes2/raw/master/data/gss_bayes.csv')

Downloaded gss_bayes.csv


In [3]:
# load data to panda df
gss = pd.read_csv('gss_bayes.csv')
gss.head()

Unnamed: 0,caseid,year,age,sex,polviews,partyid,indus10
0,1,1974,21.0,1,4.0,2.0,4970.0
1,2,1974,41.0,1,5.0,0.0,9160.0
2,5,1974,58.0,2,6.0,1.0,2670.0
3,6,1974,30.0,1,5.0,4.0,6870.0
4,7,1974,48.0,1,5.0,4.0,7860.0


In [13]:
# probaility of being a banker 
banker = gss['indus10'] == 6870
Pr_banker = banker.sum()/len(banker)
print(f' Probability of banker using basis def : {Pr_banker:.4f}')

# OR we can use 
Pr_banker_2 = banker.mean()
print(f' Probability of banker using mean : {Pr_banker:.4f}')

 Probability of banker using basis def : 0.0148
 Probability of banker using mean : 0.0148


In [16]:
# function to get probability of proposition A
def prob(A):
    return A.mean()

print(f'Prob of banker = {prob(banker):.4f}')

Prob of banker = 0.0148


In [28]:
# what is the probability of being female
print(f"Number of unique sex values : {gss['sex'].unique()}")
female = gss['sex'] == 2
print(f"Prob of female:{prob(female):.4f}")

Number of unique sex values : [1 2]
Prob of female:0.5379


In [32]:
# probability of being democrat 
democrat = gss['partyid'] <= 1
print(f'probaility of being a democrat: {prob(democrat):.4f}')

probaility of being a democrat: 0.3663


In [46]:
# prob of being liberal 
liberal = gss['polviews'] <= 3
print(f'probaility of being a democrat: {prob(liberal):.4f}')

probaility of being a democrat: 0.2737


In [33]:
# conjunction Pr(A and B)
print(f'prob of being banker and democrat = {prob(banker & democrat):.4f}')

prob of being banker and democrat = 0.0047


In [38]:
# conditional probability 

selected = female[banker]
print(f'Prob of being female given banker:{prob(selected):.4f}')

selected = banker[female]
print(f'Prob of being banker given female:{prob(selected):.4f}')

Prob of being female given banker:0.7706
Prob of being banker given female:0.0212


In [39]:
# function to get the conditional probability 

def conditional(proposition,given):
    return prob(proposition[given])

print(f'Prob of being female given banker:{conditional(female,banker):.4f}')
print(f'Prob of being banker given female:{conditional(banker,female):.4f}')

Prob of being female given banker:0.7706
Prob of being banker given female:0.0212


### Laws of probability

* $ P(A|B) = \frac{P(A \hspace{.5mm}and \hspace{.5mm} B)}{P(B)} $

* $ P(A \hspace{.5mm}and\hspace{.5mm} B) = P(A|B) P(B) $

* $ P(A|B) = \frac{P(B|A) P(A)}{P(B)} $, this is the Bayes theorem 

In [45]:
# use conjunction to compute conditional prob 

print(f'Prob of being female given banker using brakcet operator = {(female[banker]).mean():.4f}\n')
print(f'Prob of being female given banker using conditional prob. func = {conditional(female,banker):.4f}\n')
print(f'Prob of being female given banker using conjuncton = {prob(female & banker)/prob(banker):.4f}')


Prob of being female given banker using brakcet operator = 0.7706

Prob of being female given banker using conditional prob. func = 0.7706

Prob of being female given banker using conjuncton = 0.7706


In [48]:
# use conditional prob to get conjucntion 

print(f'Prob of being liberal and democrat = { prob(liberal & democrat):.4f}\n')
print(f'Prob of being liberal and democrat using conditional = { conditional(liberal,democrat) * prob(democrat):.4f}\n')

Prob of being liberal and democrat = 0.1425

Prob of being liberal and democrat using conditional = 0.1425



In [51]:
# let's check bayes theorem 

print(f'Prob of being liberal given banker : {conditional(liberal,banker):.4f}\n')
print(f'Prob of being liberal given banker using bayes: {conditional(banker,liberal) * prob(liberal)/prob(banker):.4f}')

Prob of being liberal given banker : 0.2239

Prob of being liberal given banker using bayes: 0.2239


### Law of total probability 

* $P(A) = \sum_i P(A|B_i) P(B_i)$, here $B_i$ should be mutually exclusive 


In [55]:
male = gss['sex'] == 1
print(f'Prob of being a banker:{prob(banker):.4f}')
print(f'Prob of being a female:{prob(female):.4f}')
print(f'Prob of being a male:{prob(male):.4f}')

Prob of being a banker:0.0148
Prob of being a female:0.5379
Prob of being a male:0.4621


In [58]:
print(f'Prob of being banker using total prob : {prob(banker & male) + prob(banker & female) :.4f}\n')
print(f'Prob of being banker using total prob : {conditional(banker,male) * prob(male) + conditional(banker,female) * prob(female) :.4f}\n')

Prob of being banker using total prob : 0.0148

Prob of being banker using total prob : 0.0148



In [62]:
print(f'Mutual exclusiveness between male and female:{prob(male & female)}\n')
print(f'Prob of either male or female:{prob(male)+prob(female):.4f}')

Mutual exclusiveness between male and female:0.0

Prob of either male or female:1.0000


### Excercises 

**Exercise 01:** Let's use the tools in this chapter to solve a variation of the Linda problem.

> Linda is 31 years old, single, outspoken, and very bright. She majored in philosophy. As a student, she was deeply concerned with issues of discrimination and social justice, and also participated in anti-nuclear demonstrations.  Which is more probable?
> 1. Linda is a banker.
> 2. Linda is a banker and considers herself a liberal Democrat.

To answer this question, compute 

* The probability that Linda is a banker, given that she is female,

* The probability that Linda is a banker and a liberal Democrat, given that she is female.

In [72]:
print(f'Prob of Linda being female banker : {conditional(banker,female):.5f}\n')
print(f'Prob of Linda being liberal female banker : {conditional(banker & liberal, female):.5f}\n')
print(f'Prob of Linda being liberal female banker and democrat: {conditional(banker & liberal & democrat,female):.5f}\n')

Prob of Linda being female banker : 0.02116

Prob of Linda being liberal female banker : 0.00475

Prob of Linda being liberal female banker and democrat: 0.00230



**Exercise 02:** There's a [famous quote](https://quoteinvestigator.com/2014/02/24/heart-head/) about young people, old people, liberals, and conservatives that goes something like:

> If you are not a liberal at 25, you have no heart. If you are not a conservative at 35, you have no brain.

Whether you agree with this proposition or not, it suggests some probabilities we can compute as an exercise.
Rather than use the specific ages 25 and 35, let's define `young` and `old` as under 30 or over 65:

In [74]:
young = (gss['age'] < 30)
old = (gss['age'] >= 65)

print(f'Prob of being young: {prob(young):.5f}\n')
print(f'Prob of being old: {prob(old):.5f}\n')

Prob of being young: 0.19436

Prob of being old: 0.17328



In [75]:
conservative = gss['polviews'] >=5
print(f'Prob of being conservative:{prob(conservative):.5f}')

Prob of being conservative:0.34194


In [80]:
print(f'Prob of being young liberal:{prob(young & liberal):.5f}')
print(f'Prob that young person is liberal:{conditional(liberal,young):.5f}')
print(f'fraction of old conservatives:{prob(old & conservative):.5f}')
print(f'fraction of conservatives are old:{conditional(old,conservative):.5f}')

Prob of being young liberal:0.06579
Prob that young person is liberal:0.33852
fraction of old conservatives:0.06701
fraction of conservatives are old:0.19598
