# Bayesian Networks in Python

I will build a Bayesian Network for the Alarm example in the textbook using the Python library pomegranate. 

See https://pomegranate.readthedocs.io/en/latest/BayesianNetwork.html

In [50]:
from pomegranate import *

Burglary = Node(DiscreteDistribution({'B=true': .001, 'B=false': .999}), name="Burglary")
Earthquake = Node(DiscreteDistribution({'E=true': .002, 'E=false': .998}), name = "Earthquake")
Alarm = Node(ConditionalProbabilityTable(
    [['B=true',  'E=true', 'A=true',  .95],
     ['B=true',  'E=false','A=true',  .94],
     ['B=false', 'E=true', 'A=true',  .29],
     ['B=false',  'E=false', 'A=true',  .001],
     ['B=true',  'E=true', 'A=false', .05],
     ['B=true',  'E=false','A=false', .06],
     ['B=false', 'E=true', 'A=false', .71],
     ['B=false',  'E=false', 'A=false', .999]
    ],
     [Burglary.distribution, Earthquake.distribution]
), name = "Alarm")

JohnCalls = Node(ConditionalProbabilityTable(
    [['A=true',  'J=true',  .90],
     ['A=true',  'J=false', .10],
     ['A=false', 'J=true',  .05],
     ['A=false', 'J=false', .95]
    ],
    [Alarm.distribution]
), name = "JohnCalls")
    
MaryCalls = Node(ConditionalProbabilityTable(
    [['A=true',  'M=true',  .70],
     ['A=true',  'M=false', .30],
     ['A=false', 'M=true',  .01],
     ['A=false', 'M=false', .99]
    ],
    [Alarm.distribution]
), name ="MaryCalls")


model = BayesianNetwork("Alarm Problem")
model.add_states(Burglary, Earthquake, Alarm, JohnCalls, MaryCalls)
model.add_edge(Burglary, Alarm)
model.add_edge(Earthquake, Alarm)
model.add_edge(Alarm, JohnCalls)
model.add_edge(Alarm, MaryCalls)
model.bake()

In [84]:
# this needs pygraphviz and I do not have it installed
# model.plot()

# Sample from the Bayesian Network

## Unconditional Samples

In [58]:
model.sample(n=10)

array([['B=false', 'E=false', 'A=false', 'J=false', 'M=false'],
       ['B=false', 'E=false', 'A=false', 'J=false', 'M=false'],
       ['B=false', 'E=false', 'A=false', 'J=false', 'M=false'],
       ['B=false', 'E=false', 'A=false', 'J=false', 'M=false'],
       ['B=false', 'E=false', 'A=false', 'J=true', 'M=false'],
       ['B=false', 'E=false', 'A=false', 'J=false', 'M=false'],
       ['B=false', 'E=false', 'A=false', 'J=false', 'M=false'],
       ['B=false', 'E=false', 'A=false', 'J=false', 'M=false'],
       ['B=false', 'E=false', 'A=false', 'J=false', 'M=false'],
       ['B=false', 'E=false', 'A=false', 'J=true', 'M=false']],
      dtype='<U7')

## Sampling with evidence

Fixing `Burglary` is easy since it is an unconditional node.

In [80]:
model.sample(n = 10, evidences = [{'Burglary': 'B=true'}])

array([['B=true', 'E=false', 'A=true', 'J=true', 'M=false'],
       ['B=true', 'E=false', 'A=true', 'J=true', 'M=false'],
       ['B=true', 'E=false', 'A=true', 'J=true', 'M=true'],
       ['B=true', 'E=false', 'A=true', 'J=true', 'M=true'],
       ['B=true', 'E=false', 'A=true', 'J=true', 'M=true'],
       ['B=true', 'E=false', 'A=true', 'J=true', 'M=false'],
       ['B=true', 'E=false', 'A=true', 'J=true', 'M=false'],
       ['B=true', 'E=false', 'A=true', 'J=false', 'M=true'],
       ['B=true', 'E=false', 'A=true', 'J=true', 'M=true'],
       ['B=true', 'E=false', 'A=true', 'J=true', 'M=true']], dtype='<U7')

Sampling with a given value for `Alarm` is more difficult since it depends on `Burglary` and `Earthquake`.

In [83]:
model.sample(n= 10, evidences = [{'Alarm': 'A=true'}])

Exception: Maximum iteration limit. Make sure the state configuration hinted at by evidence is reasonably reachable for this network or lower min_prob

Rejection sampling needs too many iterations because the `Alarm` does not go off very often. Use Gibbs sampling instead.

In [82]:
model.sample(n= 10, evidences = [{'Alarm': 'A=true'}], algorithm = "gibbs")

array([['B=false', 'E=false', 'A=true', 'J=true', 'M=false'],
       ['B=true', 'E=false', 'A=true', 'J=true', 'M=true'],
       ['B=true', 'E=false', 'A=true', 'J=true', 'M=true'],
       ['B=false', 'E=false', 'A=true', 'J=true', 'M=true'],
       ['B=false', 'E=false', 'A=true', 'J=false', 'M=true'],
       ['B=false', 'E=false', 'A=true', 'J=true', 'M=false'],
       ['B=false', 'E=false', 'A=true', 'J=true', 'M=true'],
       ['B=false', 'E=false', 'A=true', 'J=false', 'M=true'],
       ['B=false', 'E=true', 'A=true', 'J=true', 'M=true'],
       ['B=false', 'E=true', 'A=true', 'J=true', 'M=true']], dtype='<U32')

Sampling can be used to estimate (conditional) probabilities. 

# Estimating Probabilities

The library provides functions to calculate/estimate probabilities.

## Marginal probability distribution

Marginal means no given evidence.

In [109]:
model.marginal()

array([{
           "class" : "Distribution",
           "dtype" : "str",
           "name" : "DiscreteDistribution",
           "parameters" : [
               {
                   "B=true" : 0.0010000000000004432,
                   "B=false" : 0.9989999999999996
               }
           ],
           "frozen" : false
       }                                            ,
       {
           "class" : "Distribution",
           "dtype" : "str",
           "name" : "DiscreteDistribution",
           "parameters" : [
               {
                   "E=true" : 0.0020000000000004424,
                   "E=false" : 0.9979999999999996
               }
           ],
           "frozen" : false
       }                                            ,
       {
           "class" : "Distribution",
           "dtype" : "str",
           "name" : "DiscreteDistribution",
           "parameters" : [
               {
                   "A=false" : 0.997483557999999,
                   "A=true" :

## Joint probability

In [103]:
model.probability([
    ['B=false', None,      None,      None,      None],
    [None,      'E=false',  None,      None,      None],
    ['B=false', 'E=false',  None,      None,      None]])

array([0.999   , 0.998   , 0.997002])

In [105]:
# B and E are independent... P(B=false AND E=false) = P(B=false) * P(E=false)
0.999 * 0.998

0.9970019999999999

## Conditional probabilities given Evidence

In [115]:
model.predict_proba({'Alarm': 'A=true'})

array([{
           "class" : "Distribution",
           "dtype" : "str",
           "name" : "DiscreteDistribution",
           "parameters" : [
               {
                   "B=true" : 0.37355122828189946,
                   "B=false" : 0.6264487717181005
               }
           ],
           "frozen" : false
       }                                          ,
       {
           "class" : "Distribution",
           "dtype" : "str",
           "name" : "DiscreteDistribution",
           "parameters" : [
               {
                   "E=true" : 0.23100870196890555,
                   "E=false" : 0.7689912980310946
               }
           ],
           "frozen" : false
       }                                          , 'A=true',
       {
           "class" : "Distribution",
           "dtype" : "str",
           "name" : "DiscreteDistribution",
           "parameters" : [
               {
                   "J=true" : 0.8999999999999998,
                   "J=false

In [113]:
model.predict_proba({'JohnCalls': 'J=true', 'MaryCalls': 'M=true'})

array([{
           "class" : "Distribution",
           "dtype" : "str",
           "name" : "DiscreteDistribution",
           "parameters" : [
               {
                   "B=true" : 0.2841718353644582,
                   "B=false" : 0.7158281646355419
               }
           ],
           "frozen" : false
       }                                         ,
       {
           "class" : "Distribution",
           "dtype" : "str",
           "name" : "DiscreteDistribution",
           "parameters" : [
               {
                   "E=true" : 0.17606683840509596,
                   "E=false" : 0.823933161594904
               }
           ],
           "frozen" : false
       }                                          ,
       {
           "class" : "Distribution",
           "dtype" : "str",
           "name" : "DiscreteDistribution",
           "parameters" : [
               {
                   "A=false" : 0.2393079611368433,
                   "A=true" : 0.7606920