# 01. Probability


In [1]:
import pandas as pd
gss = pd.read_csv("gss_bayes.csv", index_col=0)
gss.head()

Unnamed: 0_level_0,year,age,sex,polviews,partyid,indus10
caseid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1974,21.0,1,4.0,2.0,4970.0
2,1974,41.0,1,5.0,0.0,9160.0
5,1974,58.0,2,6.0,1.0,2670.0
6,1974,30.0,1,5.0,4.0,6870.0
7,1974,48.0,1,5.0,4.0,7860.0


In [2]:
# get the bankers (code = 6870)
banker = (gss["indus10"] == 6870)

In [4]:
# num of bankers
banker.sum()

728

In [9]:
# fraction of bankers
banker.sum() / gss.shape[0]
# or
banker.mean() # since true sums to 1

0.014769730168391155

In [11]:
# define a function to compute this prob
def prob(A):
    """
    Computes the probabilty of a proposition A
    """
    return A.mean()

In [14]:
female = (gss["sex"] == 2)
prob(female)

0.5378575776019476

In [16]:
gss["polviews"].unique()
# 1 Extremely liberal
# 2 Liberal
# 3 Slightly liberal
# 4 Moderate
# 5 Slightly conservative
# 6 Conservative
# 7 Extremely conservative


array([4., 5., 6., 2., 7., 3., 1.])

In [19]:
liberal = (gss["polviews"] <= 3)

In [20]:
prob(liberal)

0.27374721038750255

In [21]:
democrat = (gss["partyid"] <= 1)

In [22]:
prob(democrat)

0.3662609048488537

## Conjunction
For bool `series` we can use the logical `&` to conjunct.

In [23]:
prob(female & banker)

0.011381618989653074

In [24]:
prob(banker[female])

0.02116102749801969

In [25]:
prob(female[banker])

0.7706043956043956

## Conditional Probability

In [26]:
def conditional(proposition, given):
    return prob(proposition[given])

In [27]:
conditional(liberal, given = female)

0.27581004111500884

In [28]:
conditional(female, given=liberal)

0.5419106203216483

In [29]:
male = (gss["sex"] == 1)

In [31]:
# Total probability of banker
prob(male & banker) + prob(female & banker)

0.014769730168391155

In [33]:
# or:
prob(male)*conditional(banker,given= male) + prob(female) * conditional(banker, given=female)

0.014769730168391153

In [34]:
# using "polyviews"
B = gss["polviews"]

In [35]:
B.value_counts().sort_index()

1.0     1442
2.0     5808
3.0     6243
4.0    18943
5.0     7940
6.0     7319
7.0     1595
Name: polviews, dtype: int64

In [36]:
# banker
sum(prob(B==i) * conditional(banker, B==i)
   for i in range(1,8))

0.014769730168391157

# Exercises

## Ex 1-1

Let’s use the tools in this chapter to solve a variation of the Linda problem.
Linda is 31 years old, single, outspoken, and very bright. She majored in philosophy.
As a student, she was deeply concerned with issues of discrimination and social justice,
and also participated in anti-nuclear demonstrations. Which is more probable?
1. Linda is a banker.
2. Linda is a banker and considers herself a liberal Democrat.

To answer this question, compute


In [45]:
# The probability that Linda is a female banker,
p1 = prob(female & banker)
print(p1)

0.011381618989653074


In [46]:
# The probability that Linda is a liberal female banker, and
p2 = prob(liberal & female & banker)
print(p2)

0.002556299452221546


In [47]:
#+ The probability that Linda is a liberal female banker and a Democrat.
p3 = prob(liberal & female & banker & democrat)
print(p3)

0.0012375735443294787


## Ex 1-2

Use `conditional` to compute the following probabilities:

• What is the probability that a respondent is liberal, given that they are a
Democrat?

In [48]:
conditional(liberal, given=democrat)

0.3891320002215698

• What is the probability that a respondent is a Democrat, given that they are
liberal?

In [49]:
conditional(democrat, given=liberal)

0.5206403320240125

## Ex 1-3

In [50]:
young = (gss["age"] < 30)
prob(young)

0.19435991073240008

In [51]:
old = (gss["age"]>= 65)
prob(old)

0.17328058429701765

In [53]:
conservative = (gss["polviews"] >= 5)
prob(conservative)

0.3419354838709677

In [54]:
# What is the probability that a randomly chosen respondent is a young liberal?
prob(young & liberal)

0.06579427875836884

In [55]:
# What is the probability that a young person is liberal?
conditional(liberal, given=young)

0.338517745302714

In [56]:
# What fraction of respondents are old conservatives?
prob(old & conservative)

0.06701156421180766

In [57]:
conditional(old, given=conservative)

0.19597721609113564