In [1]:
import pandas as pd
import numpy as np
from pgmpy.models import BayesianModel
from pgmpy.estimators import BayesianEstimator
from pgmpy.factors.discrete import TabularCPD
import warnings
warnings.filterwarnings('ignore')

# Nodes in the network 
### Adapted from;
Ogutu, A.G., Kogeda, O.P. and Lall, M., 2018. A Probabilistic Assessment of Location Dependent Failure Trends in South African Water Distribution Networks. In Proceedings of the International MultiConference of Engineers and Computer Scientists (Vol. 1).

### Variable names and abbreviations;
Soil_type = St ;
Material_type = Mt ;
Diameter = Di ; 
Location = Lo ;
Strain = Sn ;
Damage = Dg ;
Defects = Df ;
Corrosion = Cr ;
Leakage = Lk ;

In [2]:
# Defining the model structure. We can define the network by just passing a list of edges.
model = BayesianModel([('St', 'Cr'), ('Mt', 'Cr'), ('Di', 'Dg'), ('Lo', 'Sn'), ('Sn', 'Dg'), ('Cr', 'Lk'), ('Dg', 'Lk'), ('Df', 'Lk')])


In [6]:
# Defining individual CPDs.
cpd_St = TabularCPD(variable='St', variable_card=2, values=[[0.078, 0.922]])
cpd_Mt = TabularCPD(variable='Mt', variable_card=4, values=[[0.057, 0.34, 0.157, 0.446]])
cpd_Di = TabularCPD(variable='Di', variable_card=3, values=[[0.546, 0.35, 0.104]])
cpd_Lo = TabularCPD(variable='Lo', variable_card=2, values=[[0.633, 0.367]])
cpd_Df = TabularCPD(variable='Df', variable_card=2, values=[[0.02, 0.98]])

cpd_Sn = TabularCPD(variable='Sn', variable_card=3, 
                   values=[[0.12, 0.64],
                           [0.52, 0.2],
                           [0.36, 0.16]],                          
                  evidence=['Lo'],
                  evidence_card=[2])

cpd_Cr = TabularCPD(variable='Cr', variable_card=2, 
                   values=[[0.04, 0.29, 0.0, 0.0, 0.04, 0.29, 0.0, 0.0],
                           [0.96, 0.71, 1.0, 1.0, 0.96, 0.71, 1, 1]],                          
                  evidence=['St', 'Mt'],
                  evidence_card=[2, 4])

cpd_Dg = TabularCPD(variable='Dg', variable_card=2, 
                   values=[[0.12, 0.027, 0.025, 0.16, 0.12, 0.08, 0.64, 0.52, 0.44],
                           [0.88, 0.973, 0.975, 0.84, 0.88, 0.92, 0.36, 0.48, 0.56]],                          
                  evidence=['Sn', 'Di'],
                  evidence_card=[3, 3])
cpd_Lk = TabularCPD(variable='Lk', variable_card=2, 
                   values=[[0, 0.02, 0.012, 0.05, 0.5, 0.5, 0.512, 0.98],
                           [1, 0.98, 0.988, 0.95, 0.5, 0.5, 0.488, 0.02]],                          
                  evidence=['Dg', 'Cr', 'Df'],
                  evidence_card=[2, 2, 2])
model.add_cpds(cpd_St, cpd_Mt, cpd_Di, cpd_Lo, cpd_Sn, cpd_Df, cpd_Cr, cpd_Dg, cpd_Lk)
model.check_model()




True

In [7]:
# We can now call some methods on the BayesianModel object.
model.get_cpds()

[<TabularCPD representing P(St:2) at 0x7f01c9818240>,
 <TabularCPD representing P(Mt:4) at 0x7f01c9818048>,
 <TabularCPD representing P(Di:3) at 0x7f01c9818160>,
 <TabularCPD representing P(Lo:2) at 0x7f01c98181d0>,
 <TabularCPD representing P(Sn:3 | Lo:2) at 0x7f01c9818128>,
 <TabularCPD representing P(Df:2) at 0x7f01c9818208>,
 <TabularCPD representing P(Cr:2 | St:2, Mt:4) at 0x7f01c9818278>,
 <TabularCPD representing P(Dg:2 | Sn:3, Di:3) at 0x7f01c98182e8>,
 <TabularCPD representing P(Lk:2 | Dg:2, Cr:2, Df:2) at 0x7f01c98182b0>]

In [9]:
# Printing a CPD which doesn't have state names defined.
print(cpd_St)

+------+-------+
| St_0 | 0.078 |
+------+-------+
| St_1 | 0.922 |
+------+-------+


In [2]:
p_a = np.ones((2,1))
p_c = np.ones((2,4))

In [3]:
# start of with 3 data points
data = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
model = BayesianModel([('A', 'C'), ('B', 'C')])
estimator = BayesianEstimator(model, data)
cpd_C = estimator.estimate_cpd('C', prior_type="dirichlet", pseudo_counts= p_c)
cpd_A = estimator.estimate_cpd('A', prior_type="dirichlet", pseudo_counts= p_a)
print(cpd_C)
print(cpd_A)

+------+--------------------+--------------------+--------------------+------+
| A    | A(0)               | A(0)               | A(1)               | A(1) |
+------+--------------------+--------------------+--------------------+------+
| B    | B(0)               | B(1)               | B(0)               | B(1) |
+------+--------------------+--------------------+--------------------+------+
| C(0) | 0.3333333333333333 | 0.3333333333333333 | 0.6666666666666666 | 0.5  |
+------+--------------------+--------------------+--------------------+------+
| C(1) | 0.6666666666666666 | 0.6666666666666666 | 0.3333333333333333 | 0.5  |
+------+--------------------+--------------------+--------------------+------+
+------+-----+
| A(0) | 0.6 |
+------+-----+
| A(1) | 0.4 |
+------+-----+


In [4]:
#data = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
#model = BayesianModel([('A', 'C'), ('B', 'C')])
alpha1A = p_a + estimator.state_counts('A').values
alpha1C = p_c + estimator.state_counts('C').values
data2 = pd.DataFrame(data={'A': [1, 0, 1], 'B': [1, 1, 0], 'C': [0, 1, 0]})
estimator2 = BayesianEstimator(model, data2)
cpd_C = estimator2.estimate_cpd('C', prior_type="dirichlet", pseudo_counts= alpha1C)
cpd_A = estimator2.estimate_cpd('A', prior_type="dirichlet", pseudo_counts= alpha1A)
print(cpd_C)
print(cpd_A)

+------+--------------------+------+------+--------------------+
| A    | A(0)               | A(0) | A(1) | A(1)               |
+------+--------------------+------+------+--------------------+
| B    | B(0)               | B(1) | B(0) | B(1)               |
+------+--------------------+------+------+--------------------+
| C(0) | 0.3333333333333333 | 0.25 | 0.75 | 0.6666666666666666 |
+------+--------------------+------+------+--------------------+
| C(1) | 0.6666666666666666 | 0.75 | 0.25 | 0.3333333333333333 |
+------+--------------------+------+------+--------------------+
+------+-----+
| A(0) | 0.5 |
+------+-----+
| A(1) | 0.5 |
+------+-----+


In [6]:
# Checking if updated instance leads to the same parameter estimates if data where observed at once
datat = pd.DataFrame(data={'A': [0, 0, 1, 1, 0, 1], 'B': [0, 1, 0, 1, 1, 0], 'C': [1, 1, 0, 0, 1, 0]})
modelt = BayesianModel([('A', 'C'), ('B', 'C')])
estimatort = BayesianEstimator(model, datat)
cpd_C = estimatort.estimate_cpd('C', prior_type="dirichlet", pseudo_counts= p_c)
cpd_A = estimatort.estimate_cpd('A', prior_type="dirichlet", pseudo_counts= p_a)
print(cpd_C)
print(cpd_A)

+------+--------------------+------+------+--------------------+
| A    | A(0)               | A(0) | A(1) | A(1)               |
+------+--------------------+------+------+--------------------+
| B    | B(0)               | B(1) | B(0) | B(1)               |
+------+--------------------+------+------+--------------------+
| C(0) | 0.3333333333333333 | 0.25 | 0.75 | 0.6666666666666666 |
+------+--------------------+------+------+--------------------+
| C(1) | 0.6666666666666666 | 0.75 | 0.25 | 0.3333333333333333 |
+------+--------------------+------+------+--------------------+
+------+-----+
| A(0) | 0.5 |
+------+-----+
| A(1) | 0.5 |
+------+-----+


In [None]:
x = np.random.multinomial(n=6, pvals=[1/6, 1/6, 1/6, 1/6, 1/6, 1/6],size = 10)
print(x)

In [56]:
# Dataframe for example discretized variables
pipe_diameter = np.random.randint(0, 3 + 1, size = 10)
Failure_risk = 
Soil_type = 
pipe_material

In [30]:
estimator.state_counts('C').values.shape

(2, 4)

In [26]:
#counts = estimator.state_counts('A')
print(estimator.state_counts('A').values)

[[2]
 [1]]


In [11]:
alpha2A = alpha1A + estimator2.state_counts('A').values
alpha2C = alpha1C + estimator2.state_counts('C').values
data3 = pd.DataFrame(data={'A': [1, 1, 0], 'B': [0, 0, 1], 'C': [1, 1, 0]})
estimator3 = BayesianEstimator(model, data3)
cpd_C = estimator3.estimate_cpd('C', prior_type="dirichlet", pseudo_counts= alpha2C)
cpd_A = estimator3.estimate_cpd('A', prior_type="dirichlet", pseudo_counts= alpha2A)
print(cpd_C)
print(cpd_A)

In [14]:
data2 = pd.DataFrame(data={'A': [1, 0, 1], 'B': [1, 1, 1], 'C': [0, 1, 0]})

In [60]:
values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 4)),
                       columns=['A', 'B', 'C', 'D'])
model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D')])
estimator = BayesianEstimator(model, values)
estimator.get_parameters(prior_type='BDeu', equivalent_sample_size=5)

[<TabularCPD representing P(A:2) at 0x7ff07f344080>,
 <TabularCPD representing P(B:2 | A:2, C:2) at 0x7ff07f175908>,
 <TabularCPD representing P(C:2) at 0x7ff07f344d30>,
 <TabularCPD representing P(D:2 | C:2) at 0x7ff07f175b00>]

In [None]:
estimator