In [1]:
import pandas as pd



    Suppose there are two bowls of cookies.

        Bowl 1 contains 30 vanilla cookies and 10 chocolate cookies.

        Bowl 2 contains 20 vanilla cookies and 20 chocolate cookies.

    Now suppose you choose one of the bowls at random and, without looking, choose a cookie at random. If the cookie is vanilla, what is the probability that it came from Bowl 1?


In [3]:
table = pd.DataFrame(index=['Bowl 1', 'Bowl 2'])

In [None]:
table['prior'] = 1/2, 1/2 # Probability of choosing bowl_i
table

Unnamed: 0,prior
Bowl 1,0.5
Bowl 2,0.5


In [None]:
table['likelihood'] = 3/4, 1/2 # probability of Vanilla given bowl_i
table

Unnamed: 0,prior,likelihood
Bowl 1,0.5,0.75
Bowl 2,0.5,0.5


In [None]:
# this part is important
# This is the numerator of bayes theorem, hence called unnorm (because its not normalized by P(D)). Its just P(H_i) * P(D|H_i)
# Note, P(D) can be considered P(H_1) * P(D|H_1) + P(H_2) * P(D|H_2), which is just the sum of the numerator
table['unnorm'] = table['prior'] * table['likelihood']
table

Unnamed: 0,prior,likelihood,unnorm
Bowl 1,0.5,0.75,0.375
Bowl 2,0.5,0.5,0.25


In [None]:
p_of_D = table['unnorm'].sum()
p_of_D #5/8, same as doing the work manually.

np.float64(0.625)

In [None]:
# notice how likelihoods dont sum to 1, this is expected since they are conditional proabilities. No expectation of summing to 1
# notice how posterior sum to 1, this is because we normalized with P(D). This is why P(D) is called the normalizing constant.
table['posterior'] = table['unnorm'] / p_of_D
table

Unnamed: 0,prior,likelihood,unnorm,posterior
Bowl 1,0.5,0.75,0.375,0.6
Bowl 2,0.5,0.5,0.25,0.4



# Dice Problem
    Suppose I have a box with a 6-sided die, an 8-sided die, and a 12-sided die. I choose one of the dice at random, roll it, and report that the outcome is a 1. What is the probability that I chose the 6-sided die?


In [9]:
table2 = pd.DataFrame(index=[6, 8, 12])

In [11]:
from fractions import Fraction

table2['prior'] = Fraction(1, 3)
table2['likelihood'] = [Fraction(1, 6), Fraction(1, 8), Fraction(1, 12)]
table2

Unnamed: 0,prior,likelihood
6,1/3,1/6
8,1/3,1/8
12,1/3,1/12


In [14]:
def update(table):
    table['unnorm'] = table['prior'] * table['likelihood']
    p_of_D = table['unnorm'].sum()
    table['posterior'] = table['unnorm'] / p_of_D
    return p_of_D

In [15]:
p_of_D = update(table2)

In [16]:
table2

Unnamed: 0,prior,likelihood,unnorm,posterior
6,1/3,1/6,1/18,4/9
8,1/3,1/8,1/24,1/3
12,1/3,1/12,1/36,2/9


2.5. The Monty Hall Problem

In [17]:
table3 = pd.DataFrame(index=['Door 1', 'Door 2', 'Door 3'])
table3['prior'] = Fraction(1, 3)
table3

Unnamed: 0,prior
Door 1,1/3
Door 2,1/3
Door 3,1/3


In [None]:
# deviating from the book explanation, using wikipedia
# Given I choose door 1, what is probability of opening door 3
# the claim is P(H) is the door being chosen, which is 1/3 (hence the prior being 1/3)
# P(D|H) -> probability of car given the door. And we know monte must opena  door, can use door 3 as the base.
table3['likelihood'] = [Fraction(1, 2), Fraction(1, 1), 0] #if vehicle is door 1, then 50/50 door 3 gets selected. If vehicle is door 2, then 3 must be selected, if vehicle is door 3, then 3 cant be selected
table3 

'''
When Monty opens a door, he provides information we can use to update our belief about the location of the car. 
Part of the information is obvious. If he opens Door 3, we know the car is not behind Door 3. But part of the information is more subtle. 
Opening Door 3 is more likely if the car is behind Door 2, and less likely if it is behind Door 1. So the data is evidence in favor of Door 2.
We will come back to this notion of evidence in future chapters.
'''

Unnamed: 0,prior,likelihood
Door 1,1/3,1/2
Door 2,1/3,1
Door 3,1/3,0


In [19]:
p_of_D = update(table3)
table3

Unnamed: 0,prior,likelihood,unnorm,posterior
Door 1,1/3,1/2,1/6,1/3
Door 2,1/3,1,1/3,2/3
Door 3,1/3,0,0,0
