In [1]:
from os.path import basename, exists

In [2]:
def download(url):
    filename = basename(url)
    if not exists(filename):
        from urllib.request import urlretrieve
        local, _ = urlretrieve(url, filename)
        print(local)

download('https://github.com/AllenDowney/ThinkBayes2/raw/master/data/gss_bayes.csv')

gss_bayes.csv


In [3]:
import pandas as pd

gss = pd.read_csv('gss_bayes.csv')
gss.head()

Unnamed: 0,caseid,year,age,sex,polviews,partyid,indus10
0,1,1974,21.0,1,4.0,2.0,4970.0
1,2,1974,41.0,1,5.0,0.0,9160.0
2,5,1974,58.0,2,6.0,1.0,2670.0
3,6,1974,30.0,1,5.0,4.0,6870.0
4,7,1974,48.0,1,5.0,4.0,7860.0


In [4]:
banker = (gss['indus10'] == 6870)

In [6]:
banker.head()

0    False
1    False
2    False
3     True
4    False
Name: indus10, dtype: bool

In [7]:
banker.sum()

np.int64(728)

In [8]:
banker.mean()

np.float64(0.014769730168391155)

In [9]:
def probability(A):
    return A.mean()

In [10]:
probability(banker)

np.float64(0.014769730168391155)

In [11]:
isFemale = (gss['sex'] == 2)
isFemale.head()

0    False
1    False
2     True
3    False
4    False
Name: sex, dtype: bool

In [12]:
isFemale.sum()

np.int64(26511)

In [13]:
isFemale.mean()

np.float64(0.5378575776019476)

In [14]:
probability(isFemale)

np.float64(0.5378575776019476)

In [15]:
male = (gss['sex'] == 1)
male.head()

0     True
1     True
2    False
3     True
4     True
Name: sex, dtype: bool

In [16]:
male.sum()

np.int64(22779)

In [17]:
male.mean()

np.float64(0.46214242239805237)

In [18]:
isLiberal = (gss['polviews'] <=3)
isLiberal.head()

0    False
1    False
2    False
3    False
4    False
Name: polviews, dtype: bool

In [19]:
isLiberal.head()

0    False
1    False
2    False
3    False
4    False
Name: polviews, dtype: bool

In [20]:
isLiberal.sum()

np.int64(13493)

In [21]:
probability(isLiberal)

np.float64(0.27374721038750255)

0	Strong democrat
1	Not strong democrat
2	Independent, near democrat
3	Independent
4	Independent, near republican
5	Not strong republican
6	Strong republican
7	Other party

In [24]:
democrat = (gss['partyid'] <=1)
democrat.head()

0    False
1     True
2     True
3    False
4    False
Name: partyid, dtype: bool

In [25]:
probability(democrat)

np.float64(0.3662609048488537)

In [26]:
# Now we will demonstrating conjunction, which states prob(A) >= prob(A and B)

In [27]:
probability(banker)

np.float64(0.014769730168391155)

In [28]:
probability(isFemale)

np.float64(0.5378575776019476)

In [29]:
probability(banker & isFemale)

np.float64(0.011381618989653074)

In [30]:
# Conditional probability

In [33]:
# Probability of being a democrat given that they are liberal

numerator = probability(democrat & isLiberal)
denominator = probability(isLiberal)

numerator/denominator

np.float64(0.5206403320240124)

In [34]:
def conditional(proposition, given):
    numerator = probability(proposition & given)
    denominator = probability(given)

    return (numerator/denominator)

In [35]:
conditional(democrat, isLiberal)

np.float64(0.5206403320240124)

In [36]:
conditional(isFemale, banker)

np.float64(0.7706043956043956)

In [37]:
conditional(banker, isFemale)

np.float64(0.021161027498019694)

In [38]:
# Most bankers are females but most females arent bankers

In [40]:
# Laws of total probability

# P(A) = P(B1 and A) + P(B2 and A)
# Here, B1 and B2 are mutually exclusive and collectively exhaustive

In [41]:
probability(banker)

np.float64(0.014769730168391155)

In [43]:
prob_again = probability(isFemale & banker) + probability(male & banker)
prob_again

np.float64(0.014769730168391155)

In [45]:
# Exercises:

prob_lib_dem = conditional(isLiberal, democrat)
prob_lib_dem

np.float64(0.38913200022156985)

In [46]:
# Linda is 31 years old, single, outspoken, and very bright. She majored in philosophy. As a student, she was deeply concerned with issues of discrimination and social justice, and also participated in anti-nuclear demonstrations. Which is more probable?

# Linda is a banker.

# Linda is a banker and considers herself a liberal Democrat.

In [47]:
# Linda is a female, we need to adhere to that, so to find out if linda is a banker, we actually need to find out the probability of linda 
# being a banker given that she is a female

linda_banker = conditional(banker, isFemale)
linda_banker

np.float64(0.021161027498019694)

In [48]:
# Again, for question 2 linda is still a female, so everything has to be applied according to that

linda_second = conditional(banker & isLiberal & democrat, isFemale)
linda_second

np.float64(0.002300931688732979)

In [49]:
# If you are not a liberal at 25, you have no heart. If you are not a conservative at 35, you have no brain.
# Now we take out probabilites of this quote

gss.head()

Unnamed: 0,caseid,year,age,sex,polviews,partyid,indus10
0,1,1974,21.0,1,4.0,2.0,4970.0
1,2,1974,41.0,1,5.0,0.0,9160.0
2,5,1974,58.0,2,6.0,1.0,2670.0
3,6,1974,30.0,1,5.0,4.0,6870.0
4,7,1974,48.0,1,5.0,4.0,7860.0


In [50]:
young_people = (gss['age'] <=25)

probability(young_people)

np.float64(0.10846013390139987)

In [51]:
old_people = (gss['age'] >= 35)
probability(old_people)

np.float64(0.6944410630959627)

In [None]:
# Probability the young people have no heart:

no_heart_young = conditional(