In [5]:
# import sys
# !{sys.executable} -m pip install pgmpy
# !{sys.executable} -m pip install --upgrade pip

In [7]:
!pip install pgmpy

Collecting pgmpy
  Downloading pgmpy-0.1.17-py3-none-any.whl (1.9 MB)
[?25l[K     |▏                               | 10 kB 14.1 MB/s eta 0:00:01[K     |▍                               | 20 kB 18.6 MB/s eta 0:00:01[K     |▌                               | 30 kB 23.7 MB/s eta 0:00:01[K     |▊                               | 40 kB 28.1 MB/s eta 0:00:01[K     |▉                               | 51 kB 26.1 MB/s eta 0:00:01[K     |█                               | 61 kB 28.3 MB/s eta 0:00:01[K     |█▏                              | 71 kB 27.7 MB/s eta 0:00:01[K     |█▍                              | 81 kB 25.9 MB/s eta 0:00:01[K     |█▌                              | 92 kB 27.6 MB/s eta 0:00:01[K     |█▊                              | 102 kB 29.7 MB/s eta 0:00:01[K     |█▉                              | 112 kB 29.7 MB/s eta 0:00:01[K     |██                              | 122 kB 29.7 MB/s eta 0:00:01[K     |██▏                             | 133 kB 29.7 MB/s eta 0:00

In [12]:
import numpy as np
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination

# Defining the network structure
student_model = BayesianNetwork([('Difficulty','Grade'),('Intelligence','Grade'),('Intelligence','SAT'),('Grade','Letter')])
dif_cpd=TabularCPD('Difficulty',2,[[.6],[.4]])
int_cpd=TabularCPD('Intelligence',2,[[.7],[.3]])
gra_cpd=TabularCPD('Grade',3,[[.3,.05,.9,.5],[.4,.25,.08,.3],[.3,.7,.02,.2]],evidence=['Intelligence','Difficulty'],evidence_card=[2,2])
sat_cpd=TabularCPD('SAT',2,[[.95,.2],[.05,.8]],evidence=['Intelligence'],evidence_card=[2])
let_cpd=TabularCPD('Letter',2,[[.1,.4,.99],[.9,.6,.01]],evidence=['Grade'],evidence_card=[3])

student_model.add_cpds(dif_cpd,int_cpd,gra_cpd,sat_cpd,let_cpd)
# student_model.get_cpds()

In [13]:
student_model.check_model()

True

A smart student in an easy class is 90% likely to get an A, 8% likely to get a B, and 2% likely to get a C.
Conversely, a smart student in a hard class is only 50% likely to get an A

In [14]:
print(dif_cpd) 
print(int_cpd) 
print(gra_cpd) 
print(sat_cpd) 
print(let_cpd)

+---------------+-----+
| Difficulty(0) | 0.6 |
+---------------+-----+
| Difficulty(1) | 0.4 |
+---------------+-----+
+-----------------+-----+
| Intelligence(0) | 0.7 |
+-----------------+-----+
| Intelligence(1) | 0.3 |
+-----------------+-----+
+--------------+-----------------+-----+-----------------+
| Intelligence | Intelligence(0) | ... | Intelligence(1) |
+--------------+-----------------+-----+-----------------+
| Difficulty   | Difficulty(0)   | ... | Difficulty(1)   |
+--------------+-----------------+-----+-----------------+
| Grade(0)     | 0.3             | ... | 0.5             |
+--------------+-----------------+-----+-----------------+
| Grade(1)     | 0.4             | ... | 0.3             |
+--------------+-----------------+-----+-----------------+
| Grade(2)     | 0.3             | ... | 0.2             |
+--------------+-----------------+-----+-----------------+
+--------------+-----------------+-----------------+
| Intelligence | Intelligence(0) | Intelligence(

In [15]:
student_inference = VariableElimination(student_model)
print('P(Letter)')
q_1 = student_inference.query(['Letter'])
print(q_1)
# The prior probability of getting a strong letter is about 50-50

P(Letter)


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

+-----------+---------------+
| Letter    |   phi(Letter) |
| Letter(0) |        0.4977 |
+-----------+---------------+
| Letter(1) |        0.5023 |
+-----------+---------------+


In [16]:
print('P(Letter|Intelligence=normal)')
q_2 = student_inference.query(['Letter'],{'Intelligence':0})
print(q_2)
# If I know that intelligence is average, chances are the letter will be weak

P(Letter|Intelligence=normal)


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

+-----------+---------------+
| Letter    |   phi(Letter) |
| Letter(0) |        0.6114 |
+-----------+---------------+
| Letter(1) |        0.3886 |
+-----------+---------------+


In [17]:
print('P(Letter|Intelligence=normal,Difficulty=easy)')
q_3 = student_inference.query(['Letter'],{'Intelligence':0,'Difficulty':0})
print(q_3)
# however, if the course is easy, and the instructor writes the letter only based on the student's grade,
#    the chance that the letter is strong goes above 51%

P(Letter|Intelligence=normal,Difficulty=easy)


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

+-----------+---------------+
| Letter    |   phi(Letter) |
| Letter(0) |        0.4870 |
+-----------+---------------+
| Letter(1) |        0.5130 |
+-----------+---------------+


In [18]:
print('P(Intelligence)')
q_4 = student_inference.query(['Intelligence'])
print(q_4)
# Intelligence is normal in 70% of the cases

P(Intelligence)


0it [00:00, ?it/s]

0it [00:00, ?it/s]

+-----------------+---------------------+
| Intelligence    |   phi(Intelligence) |
| Intelligence(0) |              0.7000 |
+-----------------+---------------------+
| Intelligence(1) |              0.3000 |
+-----------------+---------------------+


In [19]:
print('P(Intelligence|Grade=poor)')
q_5 = student_inference.query(['Intelligence'],{'Grade':2})
print(q_5)
# If I know that the student received a poor grade (poor=2), 
# then, all else being equal, intelligence is only 8% likely to be superior

P(Intelligence|Grade=poor)


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

+-----------------+---------------------+
| Intelligence    |   phi(Intelligence) |
| Intelligence(0) |              0.9211 |
+-----------------+---------------------+
| Intelligence(1) |              0.0789 |
+-----------------+---------------------+


In [20]:
print('P(Intelligence|Letter=weak)')
q_6 = student_inference.query(['Intelligence'],{'Letter':0})
print(q_6)
# The effect of a weak letter is that the probability that intelligence is superior drops from 30% (prior) to 14%
#    not as bad as a poor grade

P(Intelligence|Letter=weak)


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

+-----------------+---------------------+
| Intelligence    |   phi(Intelligence) |
| Intelligence(0) |              0.8600 |
+-----------------+---------------------+
| Intelligence(1) |              0.1400 |
+-----------------+---------------------+


In [21]:
print('P(Intelligence|Letter=weak,Grade=poor) = P(Intelligence|Grade=poor)')
q_7 = student_inference.query(['Intelligence'],{'Letter':0,'Grade':2})
print(q_7)
# Notice that if I know that the Grade=poor, then the strength of the letter becomes immaterial
# In particular, P(Intelligence|Letter=weak,Grade=poor)=P(Intelligence|Grade=poor)
# In other words, Intelligence is independent of Letter given Grade

P(Intelligence|Letter=weak,Grade=poor) = P(Intelligence|Grade=poor)


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

+-----------------+---------------------+
| Intelligence    |   phi(Intelligence) |
| Intelligence(0) |              0.9211 |
+-----------------+---------------------+
| Intelligence(1) |              0.0789 |
+-----------------+---------------------+


In [22]:
print('P(Intelligence|Letter=weak,Grade=poor,SAT=high)')
q_8 = student_inference.query(['Intelligence'],{'Letter':0,'Grade':2,'SAT':1})
print(q_8)
# Now, if the student received a poor grade, but has high SAT, 
# then the likelihood of superior intelligence goes above 57%. 
# Indeed, only 5% of student with a normal intelligence have a high SAT

P(Intelligence|Letter=weak,Grade=poor,SAT=high)


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

+-----------------+---------------------+
| Intelligence    |   phi(Intelligence) |
| Intelligence(0) |              0.4217 |
+-----------------+---------------------+
| Intelligence(1) |              0.5783 |
+-----------------+---------------------+


In [23]:
print('P(Intelligence|Grade=good)')
q_9 = student_inference.query(['Intelligence'],{'Grade':1})
print(q_9)
# If I get a grade B, then the chances my intelligence is superior go up a bit (18%)

P(Intelligence|Grade=good)


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

+-----------------+---------------------+
| Intelligence    |   phi(Intelligence) |
| Intelligence(0) |              0.8252 |
+-----------------+---------------------+
| Intelligence(1) |              0.1748 |
+-----------------+---------------------+


In [24]:
print('P(Intelligence|Grade=poor,Difficulty=hard)')
q_9 = student_inference.query(['Intelligence'],{'Difficulty':1,'Grade':1})
print(q_9)
# However, difficulty explains away a grade B: 
#     therefore, chances of superior intelligence become higher than the prior probability of superior intelligence

P(Intelligence|Grade=poor,Difficulty=hard)


0it [00:00, ?it/s]

0it [00:00, ?it/s]

+-----------------+---------------------+
| Intelligence    |   phi(Intelligence) |
| Intelligence(0) |              0.6604 |
+-----------------+---------------------+
| Intelligence(1) |              0.3396 |
+-----------------+---------------------+
