In [1]:
from thinkbayes import Pmf
import thinkbayes as tb

In [2]:
pmf = Pmf()
for x in [1,2,3,4,5,6]:
    pmf.Set(x, 1/6)
    

In [3]:
pmf.GetDict()


{1: 0.16666666666666666,
 2: 0.16666666666666666,
 3: 0.16666666666666666,
 4: 0.16666666666666666,
 5: 0.16666666666666666,
 6: 0.16666666666666666}

In [4]:
pmf = Pmf()
text = "This is black and this is blue but this is neither black nur blue"
word_list = [word.lower() for word in text.split()]
for word in word_list:
    pmf.Incr(word, 1)
pmf.Normalize()
print(pmf.Prob("black"))

0.14285714285714285


# The cookie problem
Choose a vanilla cookie from one of two bowls which contain a known number of vanilla and chocolate cookies each. Bowl 1 contains 30 vanilla and 10 chocolate cookies, bowl 2 contains 20 of each.

Now we want to determine the probablitity that it was taken from bowl 1 resp. bowl 2.

The hypotheses are:
* Bowl 1 - the cookie was taken from bowl 1.
* Bowl 2 - the cookie was taken from bowl 2.

Because we have initially two equally probable hypotheses the prior priority is 1/2 for each.
The distribution is called *prior distribution*.

In [5]:
pmf = Pmf()
pmf.Set('Bowl 1', 0.5)
pmf.Set('Bowl 2', 0.5)

To udate the distribution based on new data (the vanilla cookie), we multiply each prior by the corresponding likelihood. The likelihood of drawing a vanilla cookie from bowl 1 is 3/4. The likelihood for bowl 2 is 1/2.

In [6]:
pmf.Mult('Bowl 1', 0.75)
pmf.Mult('Bowl 2', 0.5)

After this update we need to renormalize the distribution. This works because our hypotheses are mutually exclusive and collectively exhaustive.

Now the distribution contains the posterior probability for each hypothesis, which is called the *posterior distribution*.

In [8]:
pmf.Normalize()
print("The posterior probability for bowl 1 is: {}".format(pmf.Prob('Bowl 1')))

The posterior probability for bowl 1 is: 0.6000000000000001


# The Bayesian Framework
We simply encapsulate the code of our last problem into a class.

In [20]:
class Cookie(Pmf):
    def __init__(self, hypos):
        Pmf.__init__(self)
        for hypo in hypos:
            self.Set(hypo, 1) # just initialize probability here ...
        self.Normalize()      # ... then normalize
    def Update(self, data):
        for hypo in self.Values():
            like = self.Likelihood(data, hypo)
            self.Mult(hypo, like)
        self.Normalize()
    
    mixes = {'Bowl 1': {'vanilla':0.75, 'chocolate':0.25},
             'Bowl 2': {'vanilla':0.5, 'chocolate':0.5},
            }
    
    def Likelihood(self, data, hypo):
        mix = self.mixes[hypo]
        like = mix[data]
        return like

hypos = ['Bowl 1', 'Bowl 2']
pmf = Cookie(hypos)
pmf.Update('vanilla')
for hypo, prob in pmf.Items():
    print(hypo, prob)


Bowl 2 0.4
Bowl 1 0.6000000000000001


This code is not much of an improvement. But at least it generalizes a bit: we can draw multiple cookies (with replacement).

In [23]:
dataset = ['vanilla', 'chocolate', 'vanilla']
for data in dataset:
    pmf.Update(data)
for hypo, prob in pmf.Items():
    print(hypo, prob)

Bowl 2 0.3189037682964808
Bowl 1 0.6810962317035191


# Encapsulating the Framework
Class Cookie can be generalized to solve other problems. thinkbayes.Suite is such a generalization.

To apply Suite to a problem derive a class from it and overwrite the Likelihood method.

Let us apply Suite to the Monty Hall problem.

In [26]:
from thinkbayes import Suite

class Monty(Suite):
    def Likelihood(self, data, hypo):
        if hypo == data:
            return 0
        elif hypo == 'A':   # the first door chosen by the player
            return 0.5
        else:
            return 1

suite = Monty('ABC')  # all possible hypotheses, in this case doors
suite.Update('B')     # the door Monty opened
suite.Print()         # print the resulting probabilities


A 0.3333333333333333
B 0.0
C 0.6666666666666666
