In [6]:
from pybbn.probabilistic import create_reasoning_model
import numpy as np
import pandas as pd

def get_model():
    d = {
        'nodes': ['drug', 'gender', 'recovery'],
        'edges': [('gender', 'drug'), ('gender', 'recovery'), ('drug', 'recovery')]
    }
    
    p = {
        'gender': {
            'columns': ['gender', '__p__'],
            'data': [
                ['male', 0.51], ['female', 0.49]
            ]
        },
        'drug': {
            'columns': ['gender', 'drug', '__p__'],
            'data': [
                ['female', 'no', 0.24],
                ['female', 'yes', 0.76],
                ['male', 'no', 0.76],
                ['male', 'yes', 0.24]
            ]
        },
        'recovery': {
            'columns': ['gender', 'drug', 'recovery', '__p__'],
            'data': [
                ['female', 'no', 'no', 0.90],
                ['female', 'no', 'yes', 0.10],
                ['female', 'yes', 'no', 0.27],
                ['female', 'yes', 'yes', 0.73],
                ['male', 'no', 'no', 0.99],
                ['male', 'no', 'yes', 0.01],
                ['male', 'yes', 'no', 0.07],
                ['male', 'yes', 'yes', 0.93]
            ]
        }
    }
    
    model = create_reasoning_model(d, p)
    return model

model = get_model()

In [7]:
q = model.pquery()

In [8]:
q['gender']

Unnamed: 0,gender,__p__
0,female,0.49
1,male,0.51


In [9]:
q['drug']

Unnamed: 0,drug,__p__
0,no,0.5052
1,yes,0.4948


In [10]:
q['recovery']

Unnamed: 0,recovery,__p__
0,no,0.59868
1,yes,0.40132


In [64]:
def add_priors(p0, p1):
    p = p0 + p1
    p = p / np.sum(p)
    return p

In [125]:
p_true = model.pquery(evidences=model.e({'gender': 'male', 'drug': 'yes'}))['recovery'].__p__.values
p_true

array([0.07, 0.93])

In [126]:
p_pred = model.pquery(evidences=model.e({'gender': 'male', 'drug': 'yes', 'recovery': 'yes'}))['recovery'].__p__.values
p_pred

array([0., 1.])

In [127]:
p_true - p_pred

array([ 0.07, -0.07])

In [152]:
def normalize(p0, p1, e):
    p0 = p0 + e
    p1 = p1 + e
    p = p0 + p1
    return [p0 / p, p1 / p]

e = np.array(list(np.arange(0, 0.08, 0.01)) + list(-np.arange(0.01, 0.08, 0.01)))
e = np.random.choice(e, size=100)

np.mean(np.array([normalize(p_true[0], p_true[1], _e) for _e in e]), axis=0)

array([0.06039897, 0.93960103])

In [94]:
u = model.pquery(evidences=model.e({'gender': 'male', 'drug': 'yes', 'recovery': 'yes'}))['recovery'].__p__.values
y = model.pquery(evidences=model.e({'gender': 'male', 'drug': 'no'}))['recovery'].__p__.values

u_c = np.random.multinomial(5_000, u)
y_c = np.random.multinomial(5_000, y)

add_priors(u_c, y_c)

array([0.4955, 0.5045])

In [93]:
np.random.multinomial(5_000, u)

array([   0, 5000])

In [90]:
model.pquery(evidences=model.e({'gender': 'male', 'drug': 'yes', 'recovery': 'yes'}))

{'drug':   drug  __p__
 0   no    0.0
 1  yes    1.0,
 'gender':    gender  __p__
 0  female    0.0
 1    male    1.0,
 'recovery':   recovery  __p__
 0       no    0.0
 1      yes    1.0}

In [119]:
u = model.pquery(evidences=model.e({'gender': 'male', 'drug': 'yes', 'recovery': 'yes'}))['recovery'].__p__.values
y = model.pquery(evidences=model.e({'gender': 'male', 'drug': 'no'}))['recovery'].__p__.values

u_c = np.random.multinomial(1_000, np.array([0.5, 0.5]))
y_c = np.random.multinomial(1_000, y)

add_priors(u_c, y_c)

array([0.721, 0.279])

In [120]:
u_c

array([450, 550])

In [121]:
y_c

array([992,   8])

In [118]:
u = model.pquery(evidences=model.e({'gender': 'male', 'drug': 'no', 'recovery': 'no'}))['recovery'].__p__.values
y = model.pquery(evidences=model.e({'gender': 'male', 'drug': 'yes'}))['recovery'].__p__.values

u_c = np.random.multinomial(1_000, np.array([0.5, 0.5]))
y_c = np.random.multinomial(1_000, y)

add_priors(u_c, y_c)

array([0.289, 0.711])

In [66]:
u = model.pquery(evidences=model.e({'gender': 'male', 'drug': 'no'}))['recovery'].__p__.values
y = model.pquery(evidences=model.e({'gender': 'male', 'drug': 'yes'}))['recovery'].__p__.values

u_c = np.random.multinomial(500, u)
y_c = np.random.multinomial(100, y)

add_priors(u_c, y_c)

array([0.84, 0.16])

In [67]:
u = model.pquery(evidences=model.e({'gender': 'female', 'drug': 'yes'}))['recovery'].__p__.values
y = model.pquery(evidences=model.e({'gender': 'female', 'drug': 'no'}))['recovery'].__p__.values

u_c = np.random.multinomial(500, u)
y_c = np.random.multinomial(100, y)

add_priors(u_c, y_c)

array([0.38333333, 0.61666667])

In [71]:
u = model.pquery(evidences=model.e({'gender': 'female', 'drug': 'no'}))['recovery'].__p__.values
y = model.pquery(evidences=model.e({'gender': 'female', 'drug': 'yes'}))['recovery'].__p__.values

u_c = np.random.multinomial(500, u)
y_c = np.random.multinomial(100, y)

add_priors(u_c, y_c)

array([0.78166667, 0.21833333])

In [73]:
np.random.dirichlet([0.1, 0.9], size=10)

array([[3.47799424e-03, 9.96522006e-01],
       [1.93513677e-05, 9.99980649e-01],
       [1.31672200e-02, 9.86832780e-01],
       [2.59302549e-08, 9.99999974e-01],
       [2.58939574e-01, 7.41060426e-01],
       [3.87513109e-03, 9.96124869e-01],
       [8.03741838e-05, 9.99919626e-01],
       [9.13347769e-11, 1.00000000e+00],
       [2.70618792e-06, 9.99997294e-01],
       [6.45611400e-07, 9.99999354e-01]])

In [82]:
np.mean(np.array([np.random.multinomial(1, a) for a in np.random.dirichlet([0.1, 0.90], size=10_000)]), axis=0)

array([0.0996, 0.9004])

In [85]:
np.random.multinomial(10_000, [0.1, 0.90])

array([1038, 8962])

In [72]:
??np.random.dirichlet

[0;31mDocstring:[0m
dirichlet(alpha, size=None)

Draw samples from the Dirichlet distribution.

Draw `size` samples of dimension k from a Dirichlet distribution. A
Dirichlet-distributed random variable can be seen as a multivariate
generalization of a Beta distribution. The Dirichlet distribution
is a conjugate prior of a multinomial distribution in Bayesian
inference.

.. note::
    New code should use the `~numpy.random.Generator.dirichlet`
    method of a `~numpy.random.Generator` instance instead;
    please see the :ref:`random-quick-start`.

Parameters
----------
alpha : sequence of floats, length k
    Parameter of the distribution (length ``k`` for sample of
    length ``k``).
size : int or tuple of ints, optional
    Output shape.  If the given shape is, e.g., ``(m, n)``, then
    ``m * n * k`` samples are drawn.  Default is None, in which case a
    vector of length ``k`` is returned.

Returns
-------
samples : ndarray,
    The drawn samples, of shape ``(size, k)``.

Raises