# Chapter 1 - Probability

## Linda the Banker

In [1]:
import pandas as pd

gss = pd.read_csv('https://raw.githubusercontent.com/AllenDowney/BiteSizeBayes/master/gss_bayes.csv',
                index_col=0)
gss.head()

Unnamed: 0_level_0,year,age,sex,polviews,partyid,indus10
caseid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1974,21.0,1,4.0,2.0,4970.0
2,1974,41.0,1,5.0,0.0,9160.0
5,1974,58.0,2,6.0,1.0,2670.0
6,1974,30.0,1,5.0,4.0,6870.0
7,1974,48.0,1,5.0,4.0,7860.0


## Fraction of Bankers

The code for "Banking and related activities" is 6870

In [2]:
banker = (gss['indus10'] == 6870)
banker.head()

caseid
1    False
2    False
5    False
6     True
7    False
Name: indus10, dtype: bool

In [3]:
banker.sum()

728

In [4]:
banker.mean()

0.014769730168391155

In [5]:
banker.sum()/len(gss)

0.014769730168391155

## The Probability Function

In [6]:
def prob(A):
    """Computes the probability of a proposition, A."""
    return A.mean()

In [7]:
prob(banker)

0.014769730168391155

Note, `sex` is encode as:
* male = 1
* female = 2

In [8]:
female = (gss['sex'] == 2)

In [9]:
prob(female)

0.5378575776019476

## Political Views and Parties

`polviews` are coded on a 7 point scale:
* 1    Extremely liberal
* 2    Liberal
* 3    Slightly liberal
* 4    Moderate
* 5    Slightly conservative
* 6    Conservative
* 7    Extremely conservative

In [10]:
liberal = (gss['polviews'] <=3)

In [11]:
prob(liberal)

0.27374721038750255

`partyid` is encoded as:
* 0    Strong democrat
* 1    Not strong democrat
* 2    Independent, near democrat
* 3    Independent
* 4    Independent, near republican
* 5    Not strong republican
* 6    Strong republican
* 7    Other party

In [12]:
democrat = (gss['partyid'] <=1)

In [13]:
prob(democrat)

0.3662609048488537

## Conjunction

“Conjunction” is another name for the logical `and` operation. If you have two propositions, A and B, the conjunction A `and` B is True if both A `and` B are True, and False otherwise.

In [14]:
prob(banker & democrat)

0.004686548995739501

## Conditional Probability

In [15]:
selected = democrat[liberal]

In [16]:
prob(selected)

0.5206403320240125

In [17]:
selected = female[banker]
prob(selected)

0.7706043956043956

In [18]:
def conditional(proposition, given):
    """Probability of A conditioned on given."""
    return prob(proposition[given])

In [19]:
conditional(liberal, given=female)

0.27581004111500884

## Conditional Probability is not Commutative

In [20]:
conditional(female, given=banker)

0.7706043956043956

In [21]:
conditional(banker, given=female)

0.02116102749801969

## Condition and Conjunction

In [22]:
conditional(female, given=liberal & democrat)

0.576085409252669

In [23]:
conditional(liberal & female, given=banker)

0.17307692307692307

## Laws of Probability

### Theorem 1

In [24]:
female[banker].mean()

0.7706043956043956

In [25]:
conditional(female, given=banker)

0.7706043956043956

In [26]:
prob(female & banker) / prob(banker)

0.7706043956043956

### Theorem 2

In [27]:
prob(liberal & democrat)

0.1425238385067965

In [28]:
prob(democrat) * conditional(liberal, given=democrat)

0.1425238385067965

### Theorem 3

In [29]:
conditional(liberal, given=banker)

0.2239010989010989

In [30]:
prob(liberal) * conditional(banker, given=liberal) / prob(banker)

0.2239010989010989

### The Law of Total Probability

In [31]:
prob(banker)

0.014769730168391155

In [32]:
male = (gss['sex'] == 1)

In [33]:
prob(male & banker) + prob(female & banker)

0.014769730168391155

In [34]:
(prob(male) * conditional(banker, given=male) +
prob(female) * conditional(banker, given=female))

0.014769730168391153

In [35]:
B = gss['polviews']
B.value_counts().sort_index()

1.0     1442
2.0     5808
3.0     6243
4.0    18943
5.0     7940
6.0     7319
7.0     1595
Name: polviews, dtype: int64

In [36]:
i = 4
prob(B==i) * conditional(banker, B==i)

0.005822682085615744

In [37]:
sum(prob(B==i) * conditional(banker, B==i) for i in range(1,8))

0.014769730168391157

## Exercises

### A.

In [53]:
female_banker = prob(female & banker)
female_banker

0.011381618989653074

In [54]:
lib_female_banker = prob(liberal & female & banker)
lib_female_banker

0.002556299452221546

In [57]:
dem_lib_fem_bank = prob(liberal & female & banker & democrat)
dem_lib_fem_bank

0.0012375735443294787

### B.

In [43]:
liberal_democrat = conditional(liberal, given=democrat)
liberal_democrat

0.3891320002215698

In [45]:
democrat_liberal = conditional(democrat, given=liberal)
democrat_liberal

0.5206403320240125

### C.

In [46]:
young = (gss['age'] < 30)
prob(young)

0.19435991073240008

In [47]:
old = (gss['age'] >= 65)
prob(old)

0.17328058429701765

In [48]:
conservative = (gss['polviews'] >= 5)
prob(conservative)

0.3419354838709677

In [49]:
young_liberal = prob(young & liberal)
young_liberal

0.06579427875836884

In [50]:
liberal_youth = conditional(liberal, given=young)
liberal_youth

0.338517745302714

In [51]:
old_conservative = prob(old & conservative)
old_conservative

0.06701156421180766

In [52]:
conservative_elder = conditional(old, given=conservative)
conservative_elder

0.19597721609113564