In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc as pm

sns.set()

In [None]:
conjectures_arr = np.array([[False,False,False,False], #all white
                          [False,False,False,True],
                          [False,False,True,True],
                          [False,True,True,True],
                          [True,True,True,True]]) #all blue

ways_B = np.array([0,1,2,3,4])
dist_B = ways_B / ways_B.sum()
ways_W = np.array([4,3,2,1,0])
dist_W = ways_W / ways_W.sum()

flat_prior = np.ones_like(ways_B) / len(ways_B)
informative_prior = ways_B / ways_B.sum() # prior indicates probability of pulling blue



In [None]:
# sample 3 marbles from each conjecture, with flat prior
# hits_conjecture first and last are 0, because they have 0 ways to produce target

hits_conjecture = np.zeros(5)

target = np.array([True,False,True])

values = np.array([False,False,False,True])
hits = 0

for i in range(10000):
    sample = np.random.choice(values,replace=True,size=3)
    hits += (sample == target).all().sum()
    
hits_conjecture[1] = hits

values = np.array([False,False,True,True])
hits = 0

for i in range(10000):
    sample = np.random.choice(values,replace=True,size=3)
    hits += (sample == target).all().sum()
    
hits_conjecture[2] = hits

values = np.array([False,True,True,True])
hits = 0

for i in range(10000):
    sample = np.random.choice(values,replace=True,size=3)
    hits += (sample == target).all().sum()
    
hits_conjecture[3] = hits

posterior = hits_conjecture / hits_conjecture.sum()
posterior

In [None]:
def random_sampling(prior,target):
    
# randomly pull from the 5 conjectures, record number of times target sequence is pulled
    # use def. of bag to switch between priors

    #target = np.array([True,False,True,True])

    nr_pulls = 1000000
    score = np.zeros((5))

    for i in range(nr_pulls):
        #bag = np.random.choice(range(5),p=flat_prior)
        bag = np.random.choice(range(5),p=prior) # prior prob for blue

        pulls = np.random.choice(conjectures_arr[bag,:],replace=True,size=len(target))
        success = (pulls == target).all()
        score[bag] += success
    
    return score / score.sum()

In [None]:
target = np.array([True,False,True,True])

print (random_sampling(flat_prior,target))

In [None]:
# Bayesian updating - pull marbles one-by-one, update likelihood  by mult previous likelihood by ways for new marble
# dist_B,dist_W are just ways (likelihoods) to produce a B or W converted to probabilities

# first with flat prior

df_flat = pd.DataFrame({'ways_B' : ways_B,
                  'dist_B' : dist_B,
                  'ways_W' : ways_W,
                  'dist_W' : dist_W})

df_flat.index.name = 'conjecture'
prior = flat_prior

df_flat['lkh_BW'] = df_flat['dist_B'] * df_flat['dist_W']
df_flat['lkh_BWB'] = df_flat['lkh_BW'] * df_flat['dist_B']
df_flat['lkh_BWBB'] = df_flat['lkh_BWB'] * df_flat['dist_B']

unstd = df_flat['dist_B'] * prior
df_flat['post_B'] = unstd / unstd.sum()

unstd = df_flat['lkh_BW'] * prior
df_flat['post_BW'] = unstd / unstd.sum()

unstd = df_flat['lkh_BWB'] * prior
df_flat['post_BWB'] = unstd / unstd.sum()

unstd = df_flat['lkh_BWBB'] * prior
df_flat['post_BWBB'] = unstd / unstd.sum()
df_flat

In [None]:
print (random_sampling(informative_prior,target))

In [None]:
#now with informative prior - probability for pulling one blue

prior = informative_prior

df_informed = df_flat.copy()

df_informed['lkh_BW'] = df_informed['dist_B'] * df_informed['dist_W']
df_informed['lkh_BWB'] = df_informed['lkh_BW'] * df_informed['dist_B']
df_informed['lkh_BWBB'] = df_informed['lkh_BWB'] * df_informed['dist_B']
df_informed['lkh_BWBBB'] = df_informed['lkh_BWBB'] * df_informed['dist_B']


unstd = df_informed['dist_B'] * prior
df_informed['post_B'] = unstd / unstd.sum()

unstd = df_informed['lkh_BW'] * prior
df_informed['post_BW'] = unstd / unstd.sum()

unstd = df_informed['lkh_BWB'] * prior
df_informed['post_BWB'] = unstd / unstd.sum()

unstd = df_informed['lkh_BWBB'] * prior
df_informed['post_BWBB'] = unstd / unstd.sum()

unstd = df_informed['lkh_BWBBB'] * prior
df_informed['post_BWBBB'] = unstd / unstd.sum()
df_informed

In [None]:
plt.figure(figsize=(18,12))
plt.subplot(211)
plt.title('Posteriors with flat prior')
plt.ylabel('probability')
x_labels = ['conj-0','conj_1','conj_2','conj_3','conj_4']
plt.plot(x_labels,df_flat.post_B,label='posterior_B')
plt.plot(x_labels,df_flat.post_BW,label='posterior_B_W')
plt.plot(x_labels,df_flat.post_BWB,label='posterior_B_W_B')
plt.plot(x_labels,df_flat.post_BWBB,label='posterior_B_W_B_B')

plt.legend(loc='upper left')

plt.subplot(212)
plt.title('Posteriors with informative prior')
plt.ylabel('probability')
x_labels = ['conj-0','conj_1','conj_2','conj_3','conj_4']
plt.plot(x_labels,df_informed.post_B,label='posterior_B')
plt.plot(x_labels,df_informed.post_BW,label='posterior_B_W')
plt.plot(x_labels,df_informed.post_BWB,label='posterior_B_W_B')
plt.plot(x_labels,df_informed.post_BWBB,label='posterior_B_W_B_B')

plt.legend(loc='upper left')

In [None]:
# compute posterior probaility for any number of draws of blue,white

def compute_posterior(ways,prior):
    
    posterior = ways * prior
    posterior = posterior / posterior.sum()
    return posterior

# Likelihood is simply the product of all the ways Blue and White can be drawn
# and it doesnt matter if we use ways or the normalised distribution for ways

#post = compute_posterior(ways_B * ways_W * ways_B  * ways_B,flat_prior)

def pulls(B=1,W=0,prior=flat_prior):
    post = compute_posterior(dist_B ** B * dist_W ** W,prior=prior)
    return post

# number of Blue and White marbles pulled
B = 2
W = 1

flat_post = pulls(B,W)
print (flat_post)

informed_post = pulls(B,W,informative_prior)
print (informed_post)

In [None]:
plt.figure(figsize=(18,12))
plt.plot([0,1,2,3,4],flat_post,'x--',label='posterior with flat prior having pulled {} B and {} W'.format(B,W))
plt.plot([0,1,2,3,4],informed_post,'x--',label='posterior with informative prior')

xticks = ['WWWW','WWWB','WWBB','WBBB','BBBB']
plt.xlabel('Conjecture')
plt.ylabel('probability')
plt.legend(loc='upper left')
_=plt.xticks([0,1,2,3,4],xticks)


In [None]:
nr_dists = 4
dists = np.zeros((2 * nr_dists ,len(ways_B)))
idxs = np.zeros((2 * nr_dists,2))

pos = 0
for b in range(1,nr_dists + 1):
    for w_diff in range(2):
        idxs[pos,0] = b
        idxs[pos,1] = b-1
        dists[pos] = pulls(b,b-1,flat_prior) if (pos % 2 ) else pulls(b,b-1,informative_prior)
        pos += 1
        
print (dists)
print (idxs)

In [None]:
plt.figure(figsize=(18,12))

colors = ['red','blue'] # informative,flat
alpha = 0.1

for idx in range(len(idxs)):
    plt.plot(dists[idx],'x--',color=colors[(idx % 2 )],alpha=alpha)
    if idx % 2:
        alpha += 0.1
    
_=plt.xticks([0,1,2,3,4],xticks)


In [None]:
2 % 2

In [None]:
# pymc 
# likelihood = ways to produce 2 blue (successes) out of 3 trials, for each conjecture.
# conjecture first and last can be skipped, ways are 0
# p indicates probability for a blue



data = np.array([True,False,True,True,True])

p = pm.Uniform('p',0,1)
lkh = pm.Binomial('lkh',n=1,p=p,observed=True,value=data)

model = pm.Model([p,lkh])
mcmc = pm.MCMC(model)

sample = mcmc.sample(50000,10000,5)

In [None]:
post_p = mcmc.trace('p')[:]

result = pd.DataFrame({'post_p' : post_p})
result.describe()

In [None]:
plt.hist(result.post_p,bins=20)

In [None]:
### THIS STUFF IS CONFUSING... TRYING TO CREATE A CUSTOM DIST, BASED ON THE WAYS TO PULL BLUE, WHICH IS
### A DISCRETE DISTRIBUTION WITH VALUES 0, 0.25, 0.50, 0.75, 0
### DERIVED FROM [0,1,2,3,4] WAYS TO PULL A BLUE
###
### looks like when the number of draws increases, the result gets closer to the analytically calculated result above

summie = 10 # sum of ways

@pm.stochastic
def custom_dist(value=0.01,summie=summie): #default for value seems to be a necessary dummy value
    if np.any(value <= 0):
        return np.inf
    p = np.log(value / summie)
    return (p)


data = [True,False,True,True,True,True,True,True,True,True] 

lkh = pm.Binomial('lkh',n=1,p=custom_dist,observed=True,value=data)

model = pm.Model([custom_dist,lkh])

mcmc = pm.MCMC(model)
sample = mcmc.sample(50000,10000,5)

In [None]:
post_p = mcmc.trace('custom_dist')[:]

result = pd.DataFrame({'post_p' : post_p})

result.describe()

In [None]:
plt.hist(result.post_p,bins=20)