In [1]:
# refer to https://stackoverflow.com/questions/4383571/importing-files-from-different-folder, to avoid explicit path
import sys
sys.path.append('../library')

In [2]:
from thinkbayes2 import Pmf
# dien sample
pmf = Pmf()
for x in range(1, 7):
    pmf.Set(x, 1/6.0)

In [3]:
# # word counting, don't need to download,just using nltk
# import requests
# url = 'http://www.fullbooks.com/The-Holy-Bible1.html'
# r = requests.get(url)
# txt = r.text

In [4]:
# http://www.cnblogs.com/baiboy/p/nltk4.html
# https://stackoverflow.com/questions/42120146/read-in-gutenberg-text-from-nltk
# https://www.bbsmax.com/A/E35poNLJvX/
# https://stackoverflow.com/questions/3788870/how-to-check-if-a-word-is-an-english-word-with-python
from nltk.corpus import gutenberg
from nltk.corpus import wordnet
pmf = Pmf()
words = gutenberg.words('bible-kjv.txt')
for word in words:
    if wordnet.synsets(word.lower()):
        pmf.Incr(word.lower(), 1)
pmf.Normalize()
word="bible"
print("word `{}` probablity is {}".format(word, pmf.Prob(word)))

word `bible` probablity is 5.810372677303523e-06


In [4]:
# cookies using Pmf
from thinkbayes2 import Pmf
pmf = Pmf()
pmf.Set('Bow1', 0.5)
pmf.Set('Bow2', 0.5)
pmf.Mult('Bow1', 0.75)
pmf.Mult('Bow2', 0.5)
pmf.Normalize()
print(pmf.Prob('Bow1'))

0.6000000000000001


In [3]:
# cookies using Pmf subclass
class Cookie(Pmf):
    def __init__(self, hypos):
        super().__init__(self)
        for hypo in hypos:
            self.Set(hypo, 1)
        self.Normalize()
#         mixes for likelihood
        self.mixes = {'Bow1': {'vanilla':0.75, 'chocolate':0.25},
                      'Bow2': {'vanilla':0.5, 'chocolate':0.5},
                      }
    
    def Update(self, data):
        for hypo in self.Values():
            like = self.Likelihood(data, hypo)
            self.Mult(hypo, like)
        self.Normalize()
    
    def Likelihood(self, data, hypo):
        mix = self.mixes[hypo]
        like = mix[data]
        return like

In [4]:
hypos = ['Bow1', 'Bow2']
pmf = Cookie(hypos)
pmf.Update('vanilla')
for hypo, prob in pmf.Items():
    print("{}: {}".format(hypo, prob))
dataset = ['vanilla', 'chocolate', 'vanilla']
for data in dataset:
    pmf.Update(data)
for hypo, prob in pmf.Items():
    print("{}: {}".format(hypo, prob))

Bow1: 0.6000000000000001
Bow2: 0.4
Bow1: 0.627906976744186
Bow2: 0.37209302325581395


In [5]:
# cookies using Suite
from thinkbayes2 import Suite
class Cookie2(Suite):
    def __init__(self, obj):
        super().__init__(obj)
#         mixes for likelihood
        self.mixes = {
            'Bow1': {'vanilla':0.75, 'chocolate':0.25},
            'Bow2': {'vanilla':0.5, 'chocolate':0.5}
        }
    
    def Likelihood(self, data, hypo):
        mix = self.mixes[hypo]
        like = mix[data]
        return like

In [6]:
hypos=['Bow1', 'Bow2']
suite = Cookie2(hypos)
suite.Print()
suite.Update('vanilla')
suite.Print()
dataset = ['vanilla', 'chocolate', 'vanilla']
for data in dataset:
    suite.Update(data)
suite.Print()

Bow1 0.5
Bow2 0.5
Bow1 0.6000000000000001
Bow2 0.4
Bow1 0.627906976744186
Bow2 0.37209302325581395


In [7]:
# using Suite class (python abstract base class) to solve with Monty hall issue
from thinkbayes2 import Suite
class Monty(Suite):
    def Likelihood(self, data, hypo):
        """
        using data likelihood to sample from hypothesis, refer to book description in page 7-8 of "think bayes"
        """
        if hypo == data:    
            # if open door 'data' == 'hypo', is's impossible to keep "open door `data`, and car isn't behind door `data`" as True => must False
            return 0
        elif hypo == 'A':
            # if car is actually behind door 'A', then "open door `data`, and car isn't behind door `data`" will be 1/2 True
            return 0.5
        else:
            # if car NOT behind door 'A' and open door 'data'!='hypo', then you can't see car is behind door 'data', when opening door 'data' => must True
            return 1

In [8]:
suite=Monty('ABC')
suite.Update('B')
suite.Print()

A 0.3333333333333333
B 0.0
C 0.6666666666666666


In [9]:
# M&M problem, refer to https://github.com/AllenDowney/ThinkBayes2/blob/master/code/m_and_m.py
from thinkbayes2 import Suite
class M_and_M(Suite):
    def __init__(self, obj):
        """
        using data likelihood to sample from hypothesis, refer to book description in page 7-8 of "think bayes"
        """
        super().__init__(obj)
        """
        Map from hypothesis (A or B) to probability.
        """
        mix94 = dict(brown=30,
                     yellow=20,
                     red=20,
                     green=10,
                     orange=10,
                     tan=10,
                     blue=0)

        mix96 = dict(blue=24,
                     green=20,
                     orange=16,
                     yellow=14,
                     red=13,
                     brown=13,
                     tan=0)

        hypoA = dict(bag1=mix94, bag2=mix96)
        hypoB = dict(bag1=mix96, bag2=mix94)
        self.hypotheses = dict(A=hypoA, B=hypoB)
    
    def Likelihood(self, data, hypo):
        """
        data is a tuple (bag, color), bag is categorized by year.
        """
        bag, color = data
        mix = self.hypotheses[hypo][bag]
        like = mix[color]
        return like

In [10]:
suite = M_and_M('AB')
suite.Update(('bag1', 'yellow'))
suite.Update(('bag2', 'green'))
suite.Print()

A 0.7407407407407407
B 0.2592592592592592


In [18]:
# exercise 2.1 for chapter 2: cookies couldn't be put back
from thinkbayes2 import Suite
class Cookie3(Suite):
    def __init__(self, mixes):
        super().__init__(mixes.keys())
#         mixes for likelihood, format like : 
#             {
#                 'Bow1': {'chocolate': 10, 'vanilla': 30},
#                 'Bow2': {'chocolate': 20, 'vanilla': 20}
#             }
        self.mixes = mixes
    
    def Likelihood(self, data, hypo):
        mix = self.mixes[hypo]
#         get current likelihood
        like = mix[data] / sum(mix.values())
#     substract the mix[data] - 1 for new key
        self.mixes[hypo][data] = self.mixes[hypo][data] - 1
        return like

In [24]:
# also refer to https://github.com/AllenDowney/ThinkBayes2/blob/master/code/cookie3.py
mixes = {
    'Bow1': {'chocolate': 10, 'vanilla': 30},
    'Bow2': {'chocolate': 20, 'vanilla': 20}
}
suite = Cookie3(mixes)
suite.Print()
suite.Update('vanilla')
suite.Print()
dataset = ['vanilla', 'chocolate', 'vanilla']
for data in dataset:
    suite.Update(data)
suite.Print()
dataset = ['vanilla', 'chocolate', 'vanilla', 'chocolate']
for data in dataset:
    suite.Update(data)
suite.Print()
dataset = ['vanilla', 'chocolate', 'vanilla', 'chocolate', 'vanilla', 'chocolate' ]
for data in dataset:
    suite.Update(data)
suite.Print()

Bow1 0.5
Bow2 0.5
Bow1 0.6000000000000001
Bow2 0.4
Bow1 0.6403785488958991
Bow2 0.3596214511041009
Bow1 0.49174931157994994
Bow2 0.50825068842005
Bow1 0.20110803005923983
Bow2 0.7988919699407601


In [26]:
# compared with offical answer, for the same sequence
# result: correct
mixes = {
    'Bow1': {'chocolate': 10, 'vanilla': 30},
    'Bow2': {'chocolate': 20, 'vanilla': 20}
}
suite = Cookie3(mixes)
print('After 1 vanilla')
suite.Update('vanilla')
suite.Print()

print('\nAfter 1 vanilla, 1 chocolate')
suite.Update('chocolate')
suite.Print()

print('\nAfter 1 vanilla, 1 chocolate, 1 vanilla')
suite.Update('vanilla')
suite.Print()

After 1 vanilla
Bow1 0.6000000000000001
Bow2 0.4

After 1 vanilla, 1 chocolate
Bow1 0.4285714285714286
Bow2 0.5714285714285714

After 1 vanilla, 1 chocolate, 1 vanilla
Bow1 0.5337423312883436
Bow2 0.4662576687116564
