In [42]:
from ThinkBayes.code.thinkbayes import Pmf
from __future__ import division

In [12]:
pmf = Pmf()

In [18]:
for x in [1,2,3,4,5,6]: pmf.Set(x, 1/6.) 

In [19]:
pmf.Items()

[(1, 0.16666666666666666),
 (2, 0.16666666666666666),
 (3, 0.16666666666666666),
 (4, 0.16666666666666666),
 (5, 0.16666666666666666),
 (6, 0.16666666666666666)]

In [27]:
paragraph = "William Ackman has long looked up to Warren Buffett. It took a massive, money-losing position—and an obsessive aversion to sugar—to cause some friction. The hedge-fund manager took a shot at Mr. Buffett’s longtime investment in Coca-Cola Co. on Wednesday, saying the soda company has caused enormous damage to society.The broadside came at a New York conference celebrating the Oracle of Omaha’s half-century running Berkshire Hathaway Inc. And it reflected growing frustration with repeated criticism from Mr. Buffett’s longtime sidekick against one of Mr. Ackman’s most prominent investments, Valeant Pharmaceuticals International Inc.That criticism by Berkshire Vice Chairman Charles Munger against Valeant included comments in an interview with The Wall Street Journal last week: It’s just a company that was too aggressive in ignoring moral considerations in the way it did business.Valeant has defended its ethics and says it is trying to do a better job of listening to its critics. Its stock, however, has tumbled amid questions about its relationship with specialty pharmacies that distribute its drugs and its growth prospects, producing losses on paper of more than $2 billion for Mr. Ackman’s Pershing Square Capital Management LP."

In [31]:
word_list = paragraph.split(' ')

In [32]:
pmf = Pmf()
for word in word_list:
    pmf.Incr(word,1)

In [34]:
pmf.Normalize()

187

In [35]:
pmf.Prob("Valeant")

0.0106951871657754

# The Cookie Problem

In [37]:
# Prior Distribution
pmf = Pmf()
pmf.Set("Bowl 1", 0.5)
pmf.Set("Bowl 2", 0.5)

The likelihood of drawing a vanilla cookie from Bowl 1 is 3/4. The likelihood for Bowl 2 is 1/2.

In [38]:
pmf.Mult("Bowl 1", 0.75)
pmf.Mult("Bowl 2", 0.5)

In [39]:
pmf.Items()

[('Bowl 2', 0.25), ('Bowl 1', 0.375)]

In [40]:
pmf.Normalize()

0.625

In [46]:
# posterior probability of bowls given the cookie is vanilla
pmf.Items()

[('Bowl 2', 0.4), ('Bowl 1', 0.6000000000000001)]

# The Bayesian Framework

In [62]:
class Cookie(Pmf):
    def __init__(self, hypos):
        Pmf.__init__(self)
        for hypo in hypos:
            self.Set(hypo, 1)
        self.Normalize()
    def Update(self, data):
        for hypo in self.Values():
            like = self.Likelihood(data, hypo)
            self.Mult(hypo, like)
        self.Normalize()
    mixes = {
    'Bowl 1': dict(vanilla=0.75, chocolate=0.25),
    'Bowl 2': dict(vanilla=0.5, chocolate=0.5)
    }
    def Likelihood(self, data, hypo):
        mix = self.mixes[hypo]
        like = mix[data]
        return like

In [63]:
hypos = ['Bowl 1', 'Bowl 2']
pmf = Cookie(hypos)

In [64]:
pmf.Update('vanilla')

In [67]:
# Posterior Prob of each hypothesis
for hypo, prob in pmf.Items():
    print hypo, prob

Bowl 2 0.4
Bowl 1 0.6


# The Monty Hall Problem

Monty shows you three closed doors and tells you that there is a prize behind each door: one prize is a car, the other two are less valuable prizes like peanut butter and fake finger nails. The prizes are arranged at random.

The object of the game is to guess which door has the car. If you guess right, you get to keep the car.

You pick a door, which we will call Door A. We’ll call the other doors B and C.

Before opening the door you chose, Monty increases the suspense by opening either Door B or C, whichever does not have the car. (If the car is actually behind Door A, Monty can safely open B or C, so he chooses one at random.)

Then Monty offers you the option to stick with your original choice or switch to the one remaining unopened door.

The question is, should you “stick” or “switch” or does it make no differ- ence?

In [70]:
class Monty(Pmf):
    def __init__(self, hypos):
        Pmf.__init__(self)
        for hypo in hypos:
            self.Set(hypo, 1)
        self.Normalize()
    def Update(self, data):
        for hypo in self.Values():
            like = self.Likelihood(data, hypo)
            self.Mult(hypo, like)
        self.Normalize()
    def Likelihood(self, data, hypo):
        if hypo == data:
            return 0
        elif hypo == 'A':
            return 0.5
        else:
            return 1

In [71]:
hypos = 'ABC'
pmf = Monty(hypos)
data = 'B'
pmf.Update(data)

In [72]:
for hypo, prob in pmf.Items():
    print hypo, prob

A 0.333333333333
C 0.666666666667
B 0.0


# Encapsulating the Framework

In [None]:
class Suite(Pmf):
    """Represents a suite of hypotheses and their probabilities"""
    def __init__(self, hypo=tuple()):
        """Initializes the distribution"""
    def Update(self):
        """Updates each hypothesis based on the data"""
    def Print(self):
        """Prints the hypotheses and their probabilities"""

In [73]:
from ThinkBayes.code.thinkbayes import Suite

In [74]:
class Monty(Suite):
    def Likelihood(self, data, hypo):
        if hypo == data:
            return 0
        elif hypo == 'A':
            return 0.5
        else:
            return 1

In [75]:
suite = Monty('ABC')

In [76]:
suite.Update('B')

0.5

In [77]:
suite.Print()

A 0.333333333333
B 0.0
C 0.666666666667


# The M&M Problem

In 1995, they introduced blue M&M’s. Before then, the color mix in a bag of plain M&M’s was 30% Brown, 20% Yellow, 20% Red, 10% Green, 10% Orange, 10% Tan. Afterward it was 24% Blue , 20% Green, 16% Orange, 14% Yellow, 13% Red, 13% Brown.

Suppose a friend of mine has two bags of M&M’s, and he tells me that one is from 1994 and one from 1996. He won’t tell me which is which, but he gives me one M&M from each bag. One is yellow and one is green. What is the probability that the yellow one came from the 1994 bag?

In [80]:
mix94 = dict(brown=30, yellow=20, red=20, green=10, orange=10, tan=10)
mix96 = dict(blue=24, green=20, orange=16, yellow=14, red=13, brown=13)

In [81]:
hypoA = dict(bag1=mix94, bag2=mix96)
hypoB = dict(bag1=mix96, bag2=mix94)

In [82]:
hypotheses = dict(A=hypoA, B=hypoB)

In [84]:
class M_and_M(Suite):
    mix94 = dict(brown=30, yellow=20, red=20, green=10, orange=10, tan=10)
    mix96 = dict(blue=24, green=20, orange=16, yellow=14, red=13, brown=13)
    hypoA = dict(bag1=mix94, bag2=mix96)
    hypoB = dict(bag1=mix96, bag2=mix94)
    hypotheses = dict(A=hypoA, B=hypoB)
    def Likelihood(self, data, hypo):
        bag, color = data
        mix = self.hypotheses[hypo][bag]
        like = mix[color]
        return like

In [85]:
suite = M_and_M('AB')

In [86]:
suite.Update(('bag1', 'yellow'))
suite.Update(('bag2', 'green'))

15.882352941176471

In [87]:
suite.Print()

A 0.740740740741
B 0.259259259259
