In [4]:
import sys
!{sys.executable} -m pip install pgmpy

Defaulting to user installation because normal site-packages is not writeable
Collecting pgmpy
  Using cached pgmpy-0.1.20-py3-none-any.whl (1.9 MB)
Collecting statsmodels
  Using cached statsmodels-0.13.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.9 MB)
Collecting networkx
  Using cached networkx-2.8.8-py3-none-any.whl (2.0 MB)
Collecting patsy>=0.5.2
  Using cached patsy-0.5.3-py2.py3-none-any.whl (233 kB)
Installing collected packages: patsy, networkx, statsmodels, pgmpy
Successfully installed networkx-2.8.8 patsy-0.5.3 pgmpy-0.1.20 statsmodels-0.13.5


In [2]:
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD

  from .autonotebook import tqdm as notebook_tqdm


Como seran sus notas? El siguiente problema muestra la red asociada a G, las notas, su nivel de inteligencia, la dificultad de la prueba y la prueba estandarizada S, ademas de una carta de recomendacion.

In [3]:
model = BayesianModel([('D', 'G'), ('I', 'G'), ('G', 'L'), ('I', 'S')])



In [4]:
cpd_d = TabularCPD(variable='D', variable_card=2, values=[[0.6], [0.4]])
cpd_i = TabularCPD(variable='I', variable_card=2, values=[[0.7], [0.3]])
# The representation of CPD in pgmpy is a bit different than the CPD shown in the above picture. In pgmpy the columns
# are the evidence and rows are the states of the variable. 
##represents P(grade|diff, intel)

cpd_g = TabularCPD(variable='G', variable_card=3,
                   values=[[0.3, 0.05, 0.9,  0.5],
                           [0.4, 0.25, 0.08, 0.3],
                           [0.3, 0.7,  0.02, 0.2]],
                   evidence=['I', 'D'],
                   evidence_card=[2, 2])
cpd_l = TabularCPD(variable='L', variable_card=2,
                   values=[[0.1, 0.4, 0.99],
                           [0.9, 0.6, 0.01]],
                   evidence=['G'],
                   evidence_card=[3])
cpd_s = TabularCPD(variable='S', variable_card=2,
                   values=[[0.95, 0.2],
                           [0.05, 0.8]],
                   evidence=['I'],
                   evidence_card=[2])
#Add CPD’s(defined above) to the initialized model.
# Associating the CPDs with the network
model.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

#Verify the above network by using a check_model() method. If it sum up to 1, means the CPD’s are defined correctly.
 # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly 
 # defined and sum to 1.

model.check_model() 

True

In [5]:
# CPDs can also be defined using the state names of the variables. If the state names are not provided
# like in the previous example, pgmpy will automatically assign names as: 0, 1, 2, ....

cpd_d_sn = TabularCPD(variable='D', variable_card=2, values=[[0.6], [0.4]], state_names={'D': ['Easy', 'Hard']})
cpd_i_sn = TabularCPD(variable='I', variable_card=2, values=[[0.7], [0.3]], state_names={'I': ['Dumb', 'Intelligent']})
cpd_g_sn = TabularCPD(variable='G', variable_card=3, 
                      values=[[0.3, 0.05, 0.9,  0.5],
                              [0.4, 0.25, 0.08, 0.3],
                              [0.3, 0.7,  0.02, 0.2]],
                      evidence=['I', 'D'],
                      evidence_card=[2, 2],
                      state_names={'G': ['A', 'B', 'C'],
                                   'I': ['Dumb', 'Intelligent'],
                                   'D': ['Easy', 'Hard']})

cpd_l_sn = TabularCPD(variable='L', variable_card=2, 
                      values=[[0.1, 0.4, 0.99],
                              [0.9, 0.6, 0.01]],
                      evidence=['G'],
                      evidence_card=[3],
                      state_names={'L': ['Bad', 'Good'],
                                   'G': ['A', 'B', 'C']})

cpd_s_sn = TabularCPD(variable='S', variable_card=2,
                      values=[[0.95, 0.2],
                              [0.05, 0.8]],
                      evidence=['I'],
                      evidence_card=[2],
                      state_names={'S': ['Bad', 'Good'],
                                   'I': ['Dumb', 'Intelligent']})

# These defined CPDs can be added to the model. Since, the model already has CPDs associated to variables, it will
# show warning that pmgpy is now replacing those CPDs with the new ones.
model.add_cpds(cpd_d_sn, cpd_i_sn, cpd_g_sn, cpd_l_sn, cpd_s_sn)
model.check_model()

True

In [8]:
print(cpd_g_sn)

+------+---------+---------+----------------+----------------+
| I    | I(Dumb) | I(Dumb) | I(Intelligent) | I(Intelligent) |
+------+---------+---------+----------------+----------------+
| D    | D(Easy) | D(Hard) | D(Easy)        | D(Hard)        |
+------+---------+---------+----------------+----------------+
| G(A) | 0.3     | 0.05    | 0.9            | 0.5            |
+------+---------+---------+----------------+----------------+
| G(B) | 0.4     | 0.25    | 0.08           | 0.3            |
+------+---------+---------+----------------+----------------+
| G(C) | 0.3     | 0.7     | 0.02           | 0.2            |
+------+---------+---------+----------------+----------------+


In [9]:

print(model.get_cpds('G'))

+------+---------+---------+----------------+----------------+
| I    | I(Dumb) | I(Dumb) | I(Intelligent) | I(Intelligent) |
+------+---------+---------+----------------+----------------+
| D    | D(Easy) | D(Hard) | D(Easy)        | D(Hard)        |
+------+---------+---------+----------------+----------------+
| G(A) | 0.3     | 0.05    | 0.9            | 0.5            |
+------+---------+---------+----------------+----------------+
| G(B) | 0.4     | 0.25    | 0.08           | 0.3            |
+------+---------+---------+----------------+----------------+
| G(C) | 0.3     | 0.7     | 0.02           | 0.2            |
+------+---------+---------+----------------+----------------+


In [10]:
print(model.get_cpds('I'))

+----------------+-----+
| I(Dumb)        | 0.7 |
+----------------+-----+
| I(Intelligent) | 0.3 |
+----------------+-----+


In [11]:
print(model.get_cpds('D'))

+---------+-----+
| D(Easy) | 0.6 |
+---------+-----+
| D(Hard) | 0.4 |
+---------+-----+


In [12]:
# Getting the local independencies of a variable.
model.local_independencies('G'),model.local_independencies('L') 

((G ⟂ S | D, I), (L ⟂ I, D, S | G))

In [13]:
model.local_independencies(['D', 'I', 'S', 'G', 'L']) 

(D ⟂ I, S)
(I ⟂ D)
(S ⟂ G, D, L | I)
(G ⟂ S | D, I)
(L ⟂ I, D, S | G)

In [14]:
# Active trail: For any two variables A and B in a network if any change in A influences the values of B then we say
#               that there is an active trail between A and B.
# In pgmpy active_trail_nodes gives a set of nodes which are affected (i.e. correlated) by any 
# change in the node passed in the argument.
model.active_trail_nodes('D')

{'D': {'D', 'G', 'L'}}

# Inferencia

In [15]:
from pgmpy.inference import VariableElimination
infer = VariableElimination(model)
g_dist = infer.query(['G'])
print(g_dist)

+------+----------+
| G    |   phi(G) |
| G(A) |   0.3620 |
+------+----------+
| G(B) |   0.2884 |
+------+----------+
| G(C) |   0.3496 |
+------+----------+




In [16]:
print(infer.query(['G'], evidence={'D': 'Easy', 'I': 'Intelligent'}))

+------+----------+
| G    |   phi(G) |
| G(A) |   0.9000 |
+------+----------+
| G(B) |   0.0800 |
+------+----------+
| G(C) |   0.0200 |
+------+----------+


# Prediccion para nuevos puntos de data

In [17]:
infer.map_query(['G'])

Finding Elimination Order: : 100%|███████████████████████████| 2/2 [00:00<00:00, 2957.90it/s]
Eliminating: I: 100%|████████████████████████████████████████| 2/2 [00:00<00:00, 1836.79it/s]


{'G': 'A'}

In [18]:
infer.map_query(['G'], evidence={'D': 'Easy', 'I': 'Intelligent'})

Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]


{'G': 'A'}

In [19]:
infer.map_query(['G'], evidence={'D': 'Easy', 'I': 'Intelligent', 'L': 'Good', 'S': 'Good'})

Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]


{'G': 'A'}

In [20]:
infer.map_query(['L'])

Finding Elimination Order: : 100%|███████████████████████████| 3/3 [00:00<00:00, 5509.16it/s]
Eliminating: I: 100%|████████████████████████████████████████| 3/3 [00:00<00:00, 2058.39it/s]


{'L': 'Good'}

In [21]:
infer.map_query(['L'], evidence={'D': 'Hard', 'I': 'Intelligent'})

Finding Elimination Order: : 100%|███████████████████████████| 1/1 [00:00<00:00, 1885.93it/s]
Eliminating: G: 100%|████████████████████████████████████████| 1/1 [00:00<00:00, 1934.64it/s]


{'L': 'Good'}

In [22]:
print(model.get_cpds('L'))

+---------+------+------+------+
| G       | G(A) | G(B) | G(C) |
+---------+------+------+------+
| L(Bad)  | 0.1  | 0.4  | 0.99 |
+---------+------+------+------+
| L(Good) | 0.9  | 0.6  | 0.01 |
+---------+------+------+------+


## Definamos una estructura

In [25]:
### Variables:
# A edad menores de 30, 30 a 60 o mayor a 60, 
# S sexo, hombre o mujer
# E educacion, hasta colegio o universitaria
# O ocupacion, empleado o independiente
# R residencia ciudad o pueblo
# T viaja como se mueve, auto, tren u otro.

model = (BayesianModel([('A','E'),
                       ('S','E'),
                       ('E','O'),
                       ('E','R'),
                       ('O','T'),
                       ('R','T')
                       ]))

In [None]:
model.fit()

In [68]:
cpd_a = TabularCPD(variable='A',
                   variable_card=3,
                   values=[[0.472], [0.208], [0.32]])
cpd_s = TabularCPD(variable='S', 
                    variable_card=2, 
                    values=[[0.402], [0.598]])
# The representation of CPD in pgmpy is a bit different than the CPD shown in the above picture. In pgmpy the columns
# are the evidence and rows are the states of the variable. 
##represents P(grade|diff, intel)

cpd_e = TabularCPD(variable='E', variable_card=2,
                   values=[[0.639,0.846,0.538,0.719,0.892,0.811],
                           [0.361,0.154,0.462,0.281,0.108,0.189]],
                   evidence=['A', 'S'],
                   evidence_card=[3,2])

cpd_r = TabularCPD(variable='R', variable_card=2,
                   values=[[0.1, 0.4],
                           [0.9, 0.6]],
                   evidence=['E'],
                   evidence_card=[2])
cpd_O = TabularCPD(variable='O', variable_card=2,
                   values=[[0.95, 0.2],
                           [0.05, 0.8]],
                   evidence=['E'],
                   evidence_card=[2])
cpd_T = TabularCPD(variable='T', variable_card=3,
                   values=[[0.75,0.2,0.55, 0.05],
                           [0.05, 0.7,0.15, 0.85],
                           [0.2, 0.1,0.3, 0.1]],
                   evidence=['O','R'],
                   evidence_card=[2,2])

## Las secuencias a decidir son en base a la creencia

#Add CPD’s(defined above) to the initialized model.
# Associating the CPDs with the network
model.add_cpds(cpd_a, cpd_s, cpd_e, cpd_r, cpd_O, cpd_T)

#Verify the above network by using a check_model() method. If it sum up to 1, means the CPD’s are defined correctly.
 # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly 
 # defined and sum to 1.

model.check_model() 

True

In [69]:
model.local_independencies(['A', 'S', 'O', 'R', 'T','E']) 

(A ⟂ S)
(S ⟂ A)
(O ⟂ A, R, S | E)
(R ⟂ O, A, S | E)
(T ⟂ E, A, S | O, R)

In [70]:
print(cpd_e)

+------+-------+-------+-------+-------+-------+-------+
| A    | A(0)  | A(0)  | A(1)  | A(1)  | A(2)  | A(2)  |
+------+-------+-------+-------+-------+-------+-------+
| S    | S(0)  | S(1)  | S(0)  | S(1)  | S(0)  | S(1)  |
+------+-------+-------+-------+-------+-------+-------+
| E(0) | 0.639 | 0.846 | 0.538 | 0.719 | 0.892 | 0.811 |
+------+-------+-------+-------+-------+-------+-------+
| E(1) | 0.361 | 0.154 | 0.462 | 0.281 | 0.108 | 0.189 |
+------+-------+-------+-------+-------+-------+-------+


In [71]:
infer = VariableElimination(model)

g_dist = infer.query(['E'])

In [72]:
print(g_dist)

+------+----------+
| E    |   phi(E) |
| E(0) |   0.7644 |
+------+----------+
| E(1) |   0.2356 |
+------+----------+


In [73]:
 print(infer.query(['O']))

+------+----------+
| O    |   phi(O) |
| O(0) |   0.7733 |
+------+----------+
| O(1) |   0.2267 |
+------+----------+


In [75]:
print(infer.query(['T'], evidence={'E': 0, 'S': 1,'A':2}))

+------+----------+
| T    |   phi(T) |
| T(0) |   0.2473 |
+------+----------+
| T(1) |   0.6422 |
+------+----------+
| T(2) |   0.1105 |
+------+----------+
