In [None]:
!git clone https://github.com/pgmpy/pgmpy
!cd pgmpy
!python setup.py install

In [4]:
from pgmpy.factors.discrete import TabularCPD
from pgmpy.models import BayesianModel

student_model = BayesianModel([('D', 'G'),
                               ('I', 'G'),
                               ('G', 'L'),
                               ('I', 'S')])

grade_cpd = TabularCPD(
    variable='G', # 节点名称
    variable_card=3, # 节点取值个数
    values=[[0.3, 0.05, 0.9, 0.5], # 该节点的概率表
    [0.4, 0.25, 0.08, 0.3],
    [0.3, 0.7, 0.02, 0.2]],
    evidence=['I', 'D'], # 该节点的依赖节点
    evidence_card=[2, 2] # 依赖节点的取值个数
)

difficulty_cpd = TabularCPD(
            variable='D',
            variable_card=2,
            values=[[0.6, 0.4]]
)

intel_cpd = TabularCPD(
            variable='I',
            variable_card=2,
            values=[[0.7, 0.3]]
)

letter_cpd = TabularCPD(
            variable='L',
            variable_card=2,
            values=[[0.1, 0.4, 0.99],
            [0.9, 0.6, 0.01]],
            evidence=['G'],
            evidence_card=[3]
)

sat_cpd = TabularCPD(
            variable='S',
            variable_card=2,
            values=[[0.95, 0.2],
            [0.05, 0.8]],
            evidence=['I'],
            evidence_card=[2]
)

In [5]:
student_model.add_cpds(
    grade_cpd, 
    difficulty_cpd,
    intel_cpd,
    letter_cpd,
    sat_cpd
)

In [7]:
student_model.get_cpds()

[<TabularCPD representing P(G:3 | I:2, D:2) at 0x2b642f7dd68>,
 <TabularCPD representing P(D:2) at 0x2b642f7dda0>,
 <TabularCPD representing P(I:2) at 0x2b642f509e8>,
 <TabularCPD representing P(L:2 | G:3) at 0x2b642f7de80>,
 <TabularCPD representing P(S:2 | I:2) at 0x2b642f7de10>]

In [8]:
student_model.active_trail_nodes('D')

{'D': {'D', 'G', 'L'}}

In [9]:
student_model.local_independencies('G')

(G _|_ S | D, I)

In [10]:
student_model.get_independencies()

(D _|_ I, S)
(D _|_ L | G)
(D _|_ S | I)
(D _|_ I | S)
(D _|_ S | I, L)
(D _|_ L, S | G, I)
(D _|_ L | G, S)
(D _|_ S | G, I, L)
(D _|_ L | G, I, S)
(G _|_ S | I)
(G _|_ S | I, L)
(G _|_ S | D, I)
(G _|_ S | D, I, L)
(I _|_ D)
(I _|_ L | G)
(I _|_ D | S)
(I _|_ L | D, G)
(I _|_ L | G, S)
(I _|_ L | D, G, S)
(L _|_ D, I, S | G)
(L _|_ S | I)
(L _|_ I, S | D, G)
(L _|_ S | D, I)
(L _|_ D, S | G, I)
(L _|_ D, I | G, S)
(L _|_ S | D, I, G)
(L _|_ I | D, G, S)
(L _|_ D | G, I, S)
(S _|_ D)
(S _|_ L | G)
(S _|_ D, G, L | I)
(S _|_ G, D | I, L)
(S _|_ D, L | G, I)
(S _|_ L | G, D)
(S _|_ G, L | D, I)
(S _|_ D | G, I, L)
(S _|_ G | D, I, L)
(S _|_ L | G, I, D)

In [11]:
student_model.to_markov_model()

<pgmpy.models.MarkovModel.MarkovModel at 0x2b642f7d588>

In [48]:
# 进行贝叶斯推断
from pgmpy.inference import VariableElimination
student_infer = VariableElimination(student_model)
prob_G = student_infer.query(variables=['G'])

Finding Elimination Order: : 100%|█████████████████████████████████████████████████████| 4/4 [00:00<00:00, 1002.82it/s]
Eliminating: I: 100%|███████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 267.39it/s]


In [21]:
print(prob_G)

+------+----------+
| G    |   phi(G) |
| G(0) |   0.3620 |
+------+----------+
| G(1) |   0.2884 |
+------+----------+
| G(2) |   0.3496 |
+------+----------+


In [35]:
prob_G = student_infer.query(
            variables=['G'],
            evidence={'I': 1, 'D': 0})
print(prob_G)

Finding Elimination Order: : 100%|██████████████████████████████████████████████████████| 2/2 [00:00<00:00, 668.68it/s]
Eliminating: L: 100%|███████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 286.58it/s]


+------+----------+
| G    |   phi(G) |
| G(0) |   0.9000 |
+------+----------+
| G(1) |   0.0800 |
+------+----------+
| G(2) |   0.0200 |
+------+----------+


training for bayesian network

In [44]:
# 生成数据
import numpy as np
import pandas as pd

raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
data = pd.DataFrame(raw_data, columns=['D', 'I', 'G', 'L', 'S'])
data.head()

Unnamed: 0,D,I,G,L,S
0,0,1,0,0,0
1,0,0,1,1,0
2,0,0,1,1,1
3,1,0,1,1,0
4,1,1,0,0,1


In [45]:
# 定义模型
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator

model = BayesianModel([('D', 'G'), ('I', 'G'), ('I', 'S'), ('G', 'L')])

# 基于极大似然估计进行模型训练
model.fit(data, estimator=MaximumLikelihoodEstimator)
for cpd in model.get_cpds():
    # 打印条件概率分布
    print("CPD of {variable}:".format(variable=cpd.variable))
    print(cpd)

CPD of D:
+------+-------+
| D(0) | 0.484 |
+------+-------+
| D(1) | 0.516 |
+------+-------+
CPD of G:
+------+--------------------+--------------------+-------------------+--------------------+
| D    | D(0)               | D(0)               | D(1)              | D(1)               |
+------+--------------------+--------------------+-------------------+--------------------+
| I    | I(0)               | I(1)               | I(0)              | I(1)               |
+------+--------------------+--------------------+-------------------+--------------------+
| G(0) | 0.4666666666666667 | 0.5040983606557377 | 0.508130081300813 | 0.4962962962962963 |
+------+--------------------+--------------------+-------------------+--------------------+
| G(1) | 0.5333333333333333 | 0.4959016393442623 | 0.491869918699187 | 0.5037037037037037 |
+------+--------------------+--------------------+-------------------+--------------------+
CPD of I:
+------+-------+
| I(0) | 0.486 |
+------+-------+
| I(1)