In [46]:
import numpy as np
from empiricaldist import Pmf
from scipy.stats import binom

Exercise: Let’s use Bayes’s Rule to solve the Elvis problem from <<_Distributions>>:

    Elvis Presley had a twin brother who died at birth. What is the probability that Elvis was an identical twin?

In 1935, about 2/3 of twins were fraternal and 1/3 were identical. The question contains two pieces of information we can use to update this prior.

    First, Elvis’s twin was also male, which is more likely if they were identical twins, with a likelihood ratio of 2.

    Also, Elvis’s twin died at birth, which is more likely if they were identical twins, with a likelihood ratio of 1.25.

If you are curious about where those numbers come from, I wrote https://www.allendowney.com/blog/2020/01/28/the-elvis-problem-revisited/

Bayes Rule: odds(A|D) = odds(A) * P(D|A) / P(D|B)

In [3]:
def odds(p):
    return p / (1-p)

def prob(o):
    return o / (o+1)

In [5]:
'''
A = Elvis is identical twin
D = Twin brother
'''

prior_odds = odds(1/3) # 1/3 of twins were identical
# p_d_given_a = prob(2)
# p_d_given_b = prob(1.25)

# the problem statement gave likelihood ratios

# posterior_odds = prior_odds * p_d_given_a / p_d_given_b

likelihood_ratio_1 = 2
likelihood_ratio_2 = 1.25
posterior_odds = prior_odds * likelihood_ratio_1 * likelihood_ratio_2
prob(posterior_odds)


0.5555555555555555

Exercise: The following is an interview question that appeared on glassdoor.com, attributed to Facebook:

    You’re about to get on a plane to Seattle. You want to know if you should bring an umbrella. You call 3 random friends of yours who live there and ask each independently if it’s raining. Each of your friends has a 2/3 chance of telling you the truth and a 1/3 chance of messing with you by lying. All 3 friends tell you that “Yes” it is raining. What is the probability that it’s actually raining in Seattle?

Use Bayes’s Rule to solve this problem. As a prior you can assume that it rains in Seattle about 10% of the time.

This question causes some confusion about the differences between Bayesian and frequentist interpretations of probability; if you are curious about this point,

In [None]:
# Trying to find P(Rain|YYY)
# P(Rain|YYY) = P(Rain) * P(YYY|Rain) / P(YYY)

prior_odds = odds(0.1)

# prob_yyy_rain = (2/3)**3
# prob_yyy = (2/3)**3 + (1/3)**3
# this is wrong -> each friend has a likelihood ratio of 2: (2/3) / (1/3) = 2. Therefore the likelihood each says the truth is 2**3
# basically, probability of rain given truth = 2/3, probability of rain given lie = 1/3 -> (2/3) / (1/3). And then its for each friend
likelihood_ratio = 2**3

posterior_odds = prior_odds * likelihood_ratio
print(prob(posterior_odds))

0.4705882352941177


Exercise: According to the CDC, people who smoke are about 25 times more likely to develop lung cancer than nonsmokers.

Also according to the CDC, about 14% of adults in the U.S. are smokers. If you learn that someone has lung cancer, what is the probability they are a smoker?

In [None]:
prior_odds = odds(0.14)
likelihood_ratio = 25 # probability cancer given smoke / probability cancer given non smoke -> seems reasonable
posterior_odds = prior_odds * likelihood_ratio
prob(posterior_odds)

0.8027522935779816

Exercise: In Dungeons & Dragons, the amount of damage a goblin can withstand is the sum of two six-sided dice. The amount of damage you inflict with a short sword is determined by rolling one six-sided die. A goblin is defeated if the total damage you inflict is greater than or equal to the amount it can withstand.

Suppose you are fighting a goblin and you have already inflicted 3 points of damage. What is your probability of defeating the goblin with your next successful attack?

Hint: You can use Pmf.sub_dist to subtract a constant amount, like 3, from a Pmf.

In [None]:
die = Pmf(1/6, [1, 2, 3, 4, 5, 6])

goblin = Pmf.add_dist(die, die)
person = die.copy()


In [19]:
wounded_goblin = goblin.sub_dist(3) #basically shifting the distro by 3 -> 12 becomes 9, 2 becomes -1

#not quite, since -1 and 0 mean the goblin is already dead, so can ignore
posterior = Pmf.sub_dist(wounded_goblin, person)
posterior[posterior.qs <=0].sum()

np.float64(0.5)

In [21]:
wounded_goblin[[-1, 0]] = 0
wounded_goblin.normalize()
wounded_goblin

Unnamed: 0,probs
-1,0.0
0,0.0
1,0.090909
2,0.121212
3,0.151515
4,0.181818
5,0.151515
6,0.121212
7,0.090909
8,0.060606


In [23]:
posterior = Pmf.sub_dist(wounded_goblin, person)
print(posterior[posterior.qs <=0].sum(), posterior.prob_le(0))

0.4545454545454545 0.4545454545454545


Exercise: Suppose I have a box with a 6-sided die, an 8-sided die, and a 12-sided die. I choose one of the dice at random, roll it twice, multiply the outcomes, and report that the product is 12. What is the probability that I chose the 8-sided die?

Hint: Pmf provides a function called mul_dist that takes two Pmf objects and returns a Pmf that represents the distribution of the product.

In [37]:
prior = Pmf(1/3, [6, 8, 12])

dice6 = Pmf.from_seq([i for i in range(1, 7)])
dice8 = Pmf.from_seq([i for i in range(1, 9)])
dice12 = Pmf.from_seq([i for i in range(1, 13)])

dice66 = Pmf.mul_dist(dice6, dice6)
dice88 = Pmf.mul_dist(dice8, dice8)
dice1212 = Pmf.mul_dist(dice12, dice12)

In [40]:
posterior = prior * [dice66[12], dice88[12], dice1212[12]]
posterior.normalize()
posterior[8]

np.float64(0.2903225806451613)

In [41]:
posterior

Unnamed: 0,probs
6,0.516129
8,0.290323
12,0.193548


Exercise: Betrayal at House on the Hill is a strategy game in which characters with different attributes explore a haunted house. Depending on their attributes, the characters roll different numbers of dice. For example, if attempting a task that depends on knowledge, Professor Longfellow rolls 5 dice, Madame Zostra rolls 4, and Ox Bellows rolls 3. Each die yields 0, 1, or 2 with equal probability.

If a randomly chosen character attempts a task three times and rolls a total of 3 on the first attempt, 4 on the second, and 5 on the third, which character do you think it was?

In [44]:
hypos = ['longfellow', 'zostra', 'ox']
nr_dice = {'longfellow': 5, 'zostra': 4, 'ox': 3}
sides = [0, 1, 2]

prior = Pmf(1, hypos)
likelihoods = []
for person in hypos:
    dice = Pmf.from_seq(sides)
    pmf_person = dice.copy()
    for i in range(nr_dice[person] - 1):
        pmf_person = Pmf.add_dist(pmf_person, dice.copy())

    likelihoods.append(pmf_person[3] * pmf_person[4] * pmf_person[5])
likelihoods

[np.float64(0.004798274879055247),
 np.float64(0.00915247412224499),
 np.float64(0.006401463191586647)]

In [45]:
posterior = prior * likelihoods
posterior.normalize()
posterior

Unnamed: 0,probs
longfellow,0.235762
zostra,0.449704
ox,0.314534


Exercise: There are 538 members of the United States Congress. Suppose we audit their investment portfolios and find that 312 of them out-perform the market. Let’s assume that an honest member of Congress has only a 50% chance of out-performing the market, but a dishonest member who trades on inside information has a 90% chance. How many members of Congress are honest?

In [80]:
#312 outperform
# distribution of honest
# distribution of dishonest
import pandas as pd
from utils import make_binomial

n = 538
ns = range(0, n+1)
table =pd.DataFrame(index=ns, columns=ns, dtype=float)

for n_honest in ns:
    n_dishonest = n - n_honest

    dist_honest = make_binomial(n_honest, 0.5)
    dist_dishonest = make_binomial(n_dishonest, 0.9)
    dist_total = Pmf.add_dist(dist_honest, dist_dishonest)    
    table[n_honest] = dist_total

In [77]:
table.loc[312]

0      1.666278e-83
1      4.036647e-83
2      9.764057e-83
3      2.358164e-82
4      5.686556e-82
           ...     
534    5.631374e-05
535    5.006628e-05
536    4.447099e-05
537    3.946482e-05
538    3.499024e-05
Name: 312, Length: 539, dtype: float64

In [78]:
hypos = np.arange(n+1)
prior = Pmf(1, hypos)
len(prior)

539

In [81]:
posterior = prior * table.loc[312]
posterior.normalize()
posterior.mean()

np.float64(431.4882114501996)

In [82]:
posterior.max_prob()

np.int64(430)

In [73]:
posterior.credible_interval(0.9)

array([388., 477.])