# Machine Learning - Exercise 2
# Bayesian Learning

See description of the example in Russel & Norvig: Artificial Intelligence: A modern approach. Chap. 20.

In [1]:
import numpy as np

## Prior knowledge

In [None]:
PH = np.array([0.1, 0.2, 0.4, 0.2, 0.1]) # we have 5 kinds of bags of candies

PdH = {}
PdH['l'] = np.array([0.0, 0.25, 0.5, 0.75, 1.0]) # 'l' -> lime
PdH['c'] = 1 - PdH['l'] # 'c' -> cherry

print('P(H) = %s' %(str(PH)))
print('P(l|H) = %s' %(str(PdH['l']))) # likelihood for lime candy
print('P(c|H) = %s' %(str(PdH['c']))) # likelihood for cherry candy

# Probability of extracting a lime candy
cP = PdH['l'] * PH
Pl = np.sum(cP)
print('P(l) = sum %s = %.3f' %(str(cP),Pl))

P(H) = [ 0.100  0.200  0.400  0.200  0.100]
P(l|H) = [ 0.000  0.250  0.500  0.750  1.000]
P(c|H) = [ 1.000  0.750  0.500  0.250  0.000]
P(l) = sum [ 0.000  0.050  0.200  0.150  0.100] = 0.500


## Dataset

In [None]:
D = ['l','l','l','l','l']

## Bayesian Learning

In [None]:
np.set_printoptions(formatter={'float': '{: 0.3f}'.format})
P = PH
db = ''
print('P(H)      \t= %s' %(str(PH)))
for d in D:
    P = P * PdH[d]
    # Why is P divided by sum(p)?:
    # Remember the normalizing constant alpha that is used to reduce any probability function
    # to a density function with a total probability of 1.
    P = P / np.sum(P)
    db = db+d
    print('P(H|%s)  \t= %s' %(db,str(P)))

P(H)      	= [ 0.100  0.200  0.400  0.200  0.100]
P(H|l)  	= [ 0.000  0.100  0.400  0.300  0.200]
P(H|ll)  	= [ 0.000  0.038  0.308  0.346  0.308]
P(H|lll)  	= [ 0.000  0.013  0.211  0.355  0.421]
P(H|llll)  	= [ 0.000  0.004  0.132  0.335  0.529]
P(H|lllll)  	= [ 0.000  0.001  0.078  0.296  0.624]


## MAP hypothesis

In [None]:
i = np.argmax(P)
print('MAP hypothesis: h%d' %(i+1))

MAP hypothesis: h5


## Prediction

Probability that next candy is lime

Using MAP hypothesis

In [None]:
PlhMAP = PdH['l'][i]
print('P(l|h_MAP) = %.3f' %(PlhMAP))

P(l|h_MAP) = 1.000


Using all hypotheses

In [None]:
cP = PdH['l'] * P
PlD = np.sum(cP)
print('P(l|D) = sum %s = %.3f' %(str(cP),PlD))

P(l|D) = sum [ 0.000  0.000  0.039  0.222  0.624] = 0.886


# Home Exercise

Di : outcome of rolling a 6-faces die

Z = D1 + D2 = sum of the outcomes of rolling 2 dice

S = D1 + D2 + D3 = sum of the outcomes of rolling 3 dice

Z in [2,12], S in [3,18]

**Question 1**

Compute

Prior: P(S)  -- 16 values summing to 1

Posterior: P( S | D1 ) -- 16 x 6 matrix (each column sums to 1)

Posterior: P( S | D1, D2 ) -- 16 x 6 x 6 matrix (each column sums to 1)

Posterior: P( S | Z ) -- 16 x 11 matrix (each column sums to 1)

**Question 2**

Verify experimentally that

P( S | Z, D1 ) = P ( S | Z )

