In [2]:
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd 
import my_utils 


In [2]:
url = 'https://github.com/AllenDowney/BiteSizeBayes/raw/master/gss_bayes.csv'
my_utils.file_download(url)
gss = pd.read_csv('./gss_bayes.csv')
gss.head()


file already exists inside the path


Unnamed: 0,caseid,year,age,sex,polviews,partyid,indus10
0,1,1974,21.0,1,4.0,2.0,4970.0
1,2,1974,41.0,1,5.0,0.0,9160.0
2,5,1974,58.0,2,6.0,1.0,2670.0
3,6,1974,30.0,1,5.0,4.0,6870.0
4,7,1974,48.0,1,5.0,4.0,7860.0


####  <span style="color:yellow">Finding the probability of the banker in gss dataset </span> 

In [7]:
def prob(sample):
    return sample.mean()

banker  = (gss['indus10'] == 6870)
female = (gss['sex'] == 2)
liberal = (gss['polviews'] <= 3)
democrat = (gss['partyid'] <= 1)
print('percentage of bankers in the population is {}'.format(prob(banker)*100))
print('percentage of females in the population is {}'.format(prob(female)*100))
print('percentage of liberals in the population is {}'.format(prob(liberal)*100))
print('percentage of democrats in the population is {}'.format(prob(democrat)*100))

percentage of bankers in the population is 1.4769730168391155
percentage of females in the population is 53.78575776019476
percentage of liberals in the population is 27.374721038750256
percentage of democrats in the population is 36.626090484885374


#### <span style="color:yellow">Finding the conjunction or intersection 

In [8]:
print('joint probability of banker and female is given by {}'.format(prob(banker & female)))
print('joint probability of banker and democrat is given by {}'.format(prob(banker & democrat)))
    

joint probability of banker and female is given by 0.011381618989653074
joint probability of banker and democrat is given by 0.004686548995739501


#### <span style ='color:yellow'>Conditional Probability 
$P(\frac{A}{B}) = \frac{P(A \cap B)}{P(B)}$ 

In [9]:
# democrats given liberals 
print('conditional probability of democrats given liberals is {}'.format(prob(democrat[liberal])))
print('conditional probability of female bankers is {}'.format(prob(female[banker])))

conditional probability of democrats given liberals is 0.5206403320240125
conditional probability of female bankers is 0.7706043956043956


In [13]:
print('joint probability of femle and banker is {}'.format(prob(female & banker)))
print('conditional probability of banker given female is {}'.format(prob(banker[female])))
print('joint probability of female and banker using conditional probability is {}'.format(prob(female)*prob(banker[female])))
print('joint probability of female given banker is {}'.format(prob(female[banker])))
print('joint probability of banker and female using conditional probability is {}'.format(prob(banker)*prob(female[banker])))

joint probability of femle and banker is 0.011381618989653074
conditional probability of banker given female is 0.02116102749801969
joint probability of female and banker using conditional probability is 0.011381618989653073
joint probability of female given banker is 0.7706043956043956
joint probability of banker and female using conditional probability is 0.011381618989653074


In [22]:
def conditional(event_A,event_B):
    return prob(event_A[event_B])   

print("conditional probability of banker given female is {0:.3f}".format(conditional(banker, female)))
print('conditional probability of liberal and democrat given female is {0:.3f}'.format(conditional(liberal & democrat, female))) 
print('conditional probability of liberal and female given banker is {0:.3f}'.format(conditional(liberal & female, banker)))

conditional probability of banker given female is 0.021
conditional probability of liberal and democrat given female is 0.153
conditional probability of liberal and female given banker is 0.173


#### <span style='color:yellow'>The Cookie Problem 

1. Bowl 1 contains 30 vanilla and 10 chcocolate cookies
2. Bowl 2 contains 20 vanilla and 20 chocolate cookies 

 We choose one bowl at random and choose a cookie at random, if the cookie is vanilla , what is the probability that it came from Bowl 1? 


In [25]:
# using bayes table 
cookie_table = pd.DataFrame(index= ['Bowl1', 'Bowl2'])
# choosing bowl at random
cookie_table['prior'] = [1/2,1/2]
# updating the table with likelihood  of vanilla under both hypothesis, Bowl 1 and Bowl 2
cookie_table['likelihood'] = [3/4,1/2]
cookie_table.head() 

Unnamed: 0,prior,likelihood
Bowl1,0.5,0.75
Bowl2,0.5,0.5


In [27]:
# using bayes theorm, prioir * likelihood will give joint probability of vanilla and bowls 
cookie_table['joint'] = cookie_table['prior']*cookie_table['likelihood']
cookie_table.head()

Unnamed: 0,prior,likelihood,joint
Bowl1,0.5,0.75,0.375
Bowl2,0.5,0.5,0.25


In [31]:
cookie_table['posterior'] = cookie_table['joint'] /cookie_table['joint'].sum()
cookie_table.head()

Unnamed: 0,prior,likelihood,joint,posterior
Bowl1,0.5,0.75,0.375,0.6
Bowl2,0.5,0.5,0.25,0.4


#### <span style = 'color:yellow'>The Dice Problem 
* box of 6-sided die, 8-sided die and 12-sided die.
* choose one at random , roll it and reported the outcome as 1
* what is the probability that the die is a six sided one ?

In [34]:
dice_table = pd.DataFrame(index = ['6','8','12'])
dice_table['prior'] = [1/3,1/3,1/3]
dice_table['likelihood'] = [1/6,1/8,1/12]
dice_table['joint'] = dice_table['prior']*dice_table['likelihood']
dice_table['posterior'] = dice_table['joint'] / dice_table['joint'].sum()
dice_table.head()

Unnamed: 0,prior,likelihood,joint,posterior
6,0.333333,0.166667,0.055556,0.444444
8,0.333333,0.125,0.041667,0.333333
12,0.333333,0.083333,0.027778,0.222222


#### <span style= 'color:yellow'>The Monty Hall Problem

* The host of the game show, Monty Hall, shows you 3 closed doors- numbered 1,2,3 and tells that prize is behind each door 
* One prize is valuable , a car and other two are less valuable, goats 
* the object of the game is to guess which door has the car 

Suppose you pick Door 1. Befor opening the door you choose, Montey opens Door 3 and revelas a goat. Then Monty offers you the option to stick with the original choice or switch to the remaining unopened door.

To maximize the chance of winning the car, whether we should stick to Door 1 or switch to Door 2 ?

Answer : Let us solve this using Bayesian inference. Following are the assumptions
1. The host will not open the door selected by the player
2. The host will always open the door with no prize

We will start with the hypothesis = the prize is equally likely under each door . The probability is 1/3

In [4]:
# prior 
prize_table = pd.DataFrame(index = ['Door 1','Door 2','Door 3'])
prize_table['prior'] = [1/3,1/3,1/3]
prize_table.head()

Unnamed: 0,prior
Door 1,0.333333
Door 2,0.333333
Door 3,0.333333


The observed data : Host opens the door 3 and revealed the goat. The probability of the data under initial hypothesis 

1. Prize at door 1 : Probability of Host opens door 3 given prize at door 1 and player selected the Door 1 = the host will equally likely chooses Door 2 and Door 3 = 1/2
2. Prize at door 2 : Probability of Host opens door 3 given prize at door 2 and player selected the Door 1 = the host will opens 3 = 1
3. Prize at door 3 : Probability of Host opens door 3 given prize at door 3 and player selected the Door 1 = the host will never opens door 3 = 0

In [5]:
prize_table['likelihood'] = [1/2, 1 , 0]
prize_table['joint'] = prize_table['prior']*prize_table['likelihood']
prize_table['posterior'] = prize_table['joint']/ prize_table['joint'].sum()
prize_table.head()

Unnamed: 0,prior,likelihood,joint,posterior
Door 1,0.333333,0.5,0.166667,0.333333
Door 2,0.333333,1.0,0.333333,0.666667
Door 3,0.333333,0.0,0.0,0.0


As per the above posterior probability, its better to switch to Door 2

#### <span style='color:yellow'>Coins Problem 

Two coins in a box

1. One is normal with heads on one side and tails on the other and one is a trick coin with heads on both sides. 
2. choose a coin at random and see that one of the sides is heads.

What is the probability that you chose the trick coin 

In [6]:
coin_table = pd.DataFrame(index = ['Normal','Trick'])
coin_table['prior'] = [1/2,1/2]
coin_table['likelihood'] = [1/2, 1]
coin_table['joint'] = coin_table['likelihood']*coin_table['prior']
coin_table['posterior'] = coin_table['joint']/coin_table['joint'].sum()
coin_table.head()


Unnamed: 0,prior,likelihood,joint,posterior
Normal,0.5,0.5,0.25,0.333333
Trick,0.5,1.0,0.5,0.666667


#### <span style='color:yellow'>Children Problem
A person has two children and one of them is a girl. What is the probability that both children are girls 

In [8]:
children_table = pd.DataFrame(index = ['BB','GG','BG','GB'])
children_table['prior'] = [1/4,1/4,1/4,1/4]
children_table['likelihood'] = [0,1,1,1]
children_table['joint'] = children_table['prior']*children_table['likelihood']
children_table['posterior'] = children_table['joint']/children_table['joint'].sum()
children_table.head()


Unnamed: 0,prior,likelihood,joint,posterior
BB,0.25,0,0.0,0.0
GG,0.25,1,0.25,0.333333
BG,0.25,1,0.25,0.333333
GB,0.25,1,0.25,0.333333


#### <span style='color:yellow'> M&M's Color Problem
1. In 1994, the color mix in a bag of plain M & M's was 30% Brown, 20% Yellow, 20% Red, 10%Green, 10% Orange, 10% Tan
2. In 1996, it was 24% Blue, 20% Green, 16% Orange, 14% Yellow, 13% Red, 13% Brown
   
Suppose a friend of you have two bags of M & M's and he tells you that one is from 1994 and one from 1996. He won't tell you which is which, but he gives you one M &M from each bag. One is yellow and one is green. What is the probability that one came from the 1994 bag? 

* Hypothesis : the selection of the bags 
* Data : the friend selected one Yellow & one Green from the two bags 
* Posterior : Given one green and one yellow, what is the probability of yellow coming from 1994 bag? 

In [5]:
bayes_table = pd.DataFrame(index = ['1994','1996'])
bayes_table['prior'] = [1/2,1/2]
bayes_table['likelihood'] = [0.2*0.2, 0.14*0.1]
bayes_table['joint'] = bayes_table['prior']*bayes_table['likelihood']
bayes_table['posterior'] = bayes_table['joint']/bayes_table['joint'].sum()
bayes_table.head()


Unnamed: 0,prior,likelihood,joint,posterior
1994,0.5,0.04,0.02,0.740741
1996,0.5,0.014,0.007,0.259259


#### <span style= 'color:yellow'> Bolt Problem

In a manufaturing factory, machines A,B,C produce 25%, 35% and 40% bolts respectively. Out of the total, 5%, 4% and 2% are defective bolts. A bolt is drawn random from the product. If bolt drawn is found to be defective, What is the probability, it is manufactured by B 

In [9]:
bolt_table = pd.DataFrame(index = ['A','B','C'])
bolt_table['prior'] = [0.25,0.35,0.4]
bolt_table['likelihood'] = [0.05,0.04,0.02]
bolt_table['joint'] = bolt_table['prior']*bolt_table['likelihood']
bolt_table['posterior'] = bolt_table['joint']/bolt_table['joint'].sum()
bolt_table.head() 

Unnamed: 0,prior,likelihood,joint,posterior
A,0.25,0.05,0.0125,0.362319
B,0.35,0.04,0.014,0.405797
C,0.4,0.02,0.008,0.231884


#### <span style='color:yellow'>Insurance Problem
An insurance company insured 2000 scooter drivers, 4000 car drivers, 6000 truck drivers. Probability of accident involving scooter, car and truck is 0.01, 0.03, 0.15 respectively. One of the insured person met with an accident and what is the probability that he is a scooter driver

In [10]:
accident_table = pd.DataFrame(index = ['scooter','car','truck'])
accident_table['prior' ] = [0.2, 0.4, 0.6]
accident_table['likeihood'] = [0.01,0.03,0.15]
accident_table['unorm'] = accident_table['prior']*accident_table['likeihood']
accident_table['posterior'] = accident_table['unorm']/accident_table['unorm'].sum()
accident_table.head()


Unnamed: 0,prior,likeihood,unorm,posterior
scooter,0.2,0.01,0.002,0.019231
car,0.4,0.03,0.012,0.115385
truck,0.6,0.15,0.09,0.865385


#### <span style = 'color:yellow'> Truth Problem

A man speaks truth 3 out of 4 times. He threw a dice and told it is a 'six'. What is the probability that it is actually 6 ?

In [11]:
truth_table = pd.DataFrame(index = ['Truth','False'])
truth_table['prior'] = [1/6, 5/6]
truth_table['likelihood'] = [0.75, 0.25]
truth_table['unorm'] = truth_table['prior']*truth_table['likelihood']
truth_table['posterior'] = truth_table['unorm']/truth_table['unorm'].sum()
truth_table.head()

Unnamed: 0,prior,likelihood,unorm,posterior
Truth,0.166667,0.75,0.125,0.375
False,0.833333,0.25,0.208333,0.625


#### <span style = 'color:yellow'> Heroin Addiction Problem 
The probability of being addicted to heroin is 0.01% for a person randomly picked from a population (base rate). If a randomly picked person from this population is addicted to heroin, the probability is 100% that he or she will have fresh needle pricks (sensitivity). If a randomly picked person from this population is not addicted to heroin, the probability is 0.19% that he or she will still have fresh needle pricks (false alarm rate). What is the probability that a randomly picked person from this population who has fresh needle pricks is addicted to heroin (posterior probability)?

### <span style='color:orange'> Distributions

ModuleNotFoundError: No module named 'empericaldist'