In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import scipy as sp

In [2]:
gss = pd.read_csv(Path.cwd() / 'data' / 'gss_bayes.csv')
gss.head()

Unnamed: 0,caseid,year,age,sex,polviews,partyid,indus10
0,1,1974,21.0,1,4.0,2.0,4970.0
1,2,1974,41.0,1,5.0,0.0,9160.0
2,5,1974,58.0,2,6.0,1.0,2670.0
3,6,1974,30.0,1,5.0,4.0,6870.0
4,7,1974,48.0,1,5.0,4.0,7860.0


In [3]:
(pd.concat([gss.dtypes, gss.nunique()], axis=1)
 .rename(dict(enumerate(['dtype', 'nunique'])), axis=1)
)

Unnamed: 0,dtype,nunique
caseid,int64,4376
year,int64,29
age,float64,72
sex,int64,2
polviews,float64,7
partyid,float64,8
indus10,float64,270


In [4]:
banker = (gss['indus10'] == 6870)
banker.head()

0    False
1    False
2    False
3     True
4    False
Name: indus10, dtype: bool

In [5]:
def prob(ser):
    return ser.mean()

In [6]:
prob(banker)

0.014769730168391155

In [7]:
female = (gss['sex'] == 2)
female.head()

0    False
1    False
2     True
3    False
4    False
Name: sex, dtype: bool

In [8]:
prob(female)

0.5378575776019476

In [9]:
liberal = (gss['polviews'] <= 3)
liberal.head()

0    False
1    False
2    False
3    False
4    False
Name: polviews, dtype: bool

In [10]:
prob(liberal)

0.27374721038750255

In [11]:
democrat = (gss['partyid'].isin([0, 1]))
democrat.head()

0    False
1     True
2     True
3    False
4    False
Name: partyid, dtype: bool

In [12]:
prob(democrat)

0.3662609048488537

In [13]:
prob(banker & democrat)

0.004686548995739501

In [14]:
selected = democrat[liberal]
selected.head()

8     False
17     True
18    False
24     True
28    False
Name: partyid, dtype: bool

In [15]:
prob(selected)

0.5206403320240125

In [16]:
prob(female[banker])

0.7706043956043956

In [17]:
def conditional(proposition, given):
    return prob(proposition[given])

In [18]:
conditional(female, liberal & democrat)

0.576085409252669

In [19]:
conditional(female & liberal, banker)

0.17307692307692307

In [20]:
prob(female & banker) / prob(banker)

0.7706043956043956

In [23]:
prob(democrat) * conditional(liberal, democrat) == prob(democrat & liberal)

True

In [24]:
conditional(liberal, banker)

0.2239010989010989

In [26]:
prob(liberal) * conditional(banker, liberal) / prob(banker)

0.2239010989010989

In [27]:
male = (gss['sex'] == 1)
male.head()

0     True
1     True
2    False
3     True
4     True
Name: sex, dtype: bool

In [28]:
prob(male & banker) + prob(female & banker) == prob(banker)

True

In [30]:
(prob(male) * conditional(banker, male)) + (prob(female) * conditional(banker, female))

0.014769730168391153

In [31]:
B = gss['polviews']
B.value_counts().sort_index()

1.0     1442
2.0     5808
3.0     6243
4.0    18943
5.0     7940
6.0     7319
7.0     1595
Name: polviews, dtype: int64

In [35]:
i = 4
prob(B == i) * conditional(banker, B == i)

0.005822682085615744

In [37]:
sum(prob(B == i) * conditional(banker, B == i) for i in range(1, 8))

0.014769730168391157