# 5a. Training network parameters using ML estimation
This notebook shows how to learn parameters from data using Thomas.

In [1]:
%run '_preamble.ipynb'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

available imports:
  import os
  import logging
  import pandas as pd
  import numpy as np

connect to this kernel with:
  jupyter console --existing d203cde5-414b-4f2d-bf3b-d54456f57fd2

Could not create logging directory "../logs"
Logging to: "../logs/notebook.log"
Current date/time: 26-04-2021, 20:53
Current working directory: "/Users/melle/software-development/thomas-master/notebooks"


In [2]:
from thomas.core import examples
from thomas.core import BayesianNetwork, Factor, CPT, JPT
from thomas.core.bayesiannetwork import DiscreteNetworkNode
from thomas.jupyter import BayesianNetworkWidget

from IPython.display import display, HTML

## Example 17.2

### Create the structure

In [3]:
# Create the Nodes
H = DiscreteNetworkNode('H', states=['T', 'F'], position=[165, 29])
S = DiscreteNetworkNode('S', states=['T', 'F'], position=[66,141])
E = DiscreteNetworkNode('E', states=['T', 'F'], position=[288,154])

nodes = [H, S, E]

# Create the edges
edges = [
    ('H', 'S'),
    ('H', 'E'),
]

bn = BayesianNetwork('Example 17.2', nodes, edges)

for node in bn.nodes.values():
    node.reset()

In [4]:
# If a CPT is not explicitly set, a DiscreteNetworkNode will assign a uniform distribution.
bn['H'].cpt

H,T,F
,0.5,0.5


In [5]:
view = BayesianNetworkWidget(bn, height=250)
display(view)

BayesianNetworkWidget(height=250, marginals_and_evidence={'marginals': {'H': {'T': 0.5, 'F': 0.5}, 'S': {'T': …

### Load data to learn parameters with

In [6]:
filename = thomas.core.get_pkg_data('dataset_17_2.csv')
df = pd.read_csv(filename, sep=';')

print(f'df.shape: {df.shape[0]} rows x {df.shape[1]} cols')
df

df.shape: 16 rows x 4 cols


Unnamed: 0,Case,H,S,E
0,1,T,F,T
1,2,T,F,T
2,3,F,T,F
3,4,F,F,T
4,5,T,F,F
5,6,T,F,T
6,7,F,F,F
7,8,T,F,T
8,9,T,F,T
9,10,F,F,T


### Compute the Maximum Likelihood from the data

In [7]:
# Peform parameter estimation. This updates the CPTs of the BN in place.
bn = bn.copy()
bn.ML_estimation(df)

In [8]:
# After ML estimation, the CPT is updated to reflect the probabilities in the data.
bn['H'].cpt

H,F,T
,0.25,0.75


In [9]:
bn.P('H|E=T')

H,T,F
,0.846154,0.153846


In [10]:
# This can also be shown by visualizing the marginals.
view2 = BayesianNetworkWidget(bn, height=250)
display(view2)


BayesianNetworkWidget(height=250, marginals_and_evidence={'marginals': {'H': {'T': 0.8461538461538461, 'F': 0.…

In [11]:
bn.reset_evidence()

In [12]:
bn.set_evidence_hard('E', 'T')

In [13]:
bn.evidence

{'E': 'T'}