# 1. Factors

In [1]:
%run '_preamble.ipynb'

Python version: 3.8.10
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

available imports:
  import os
  import logging
  import pandas as pd
  import numpy as np

connect to this kernel with:
  jupyter console --existing 6d0c7476-e11c-4540-9b05-6e5fd8409ba7

Could not create logging directory "../logs"
Logging to: "../logs/notebook.log"
Current date/time: 15-06-2021, 17:28
Current working directory: "/Users/melle/software-development/thomas-master/notebooks"


In [2]:
import itertools

import thomas.core
from thomas.core.factors import Factor
from thomas.core import examples

print(f"Using thomas.core version '{thomas.core.__version__}'")

from IPython.display import display, HTML

Using thomas.core version '0.1.3'


In [3]:
# Create the Factors for the Sprinkler network. 
# Note that the order in which the states are defined is important. 
# As such, this will only work in Python ≥ 3.6!          ^^^^^^^^^

# P(A)
fA = Factor(
    [0.6, 0.4], 
    {'A': ['a1', 'a0']}
)

# P(B|A)
fB_A = Factor(
    [0.2, 0.8, 0.75, 0.25], 
    {'A': ['a1', 'a0'],'B': ['b1', 'b0']}
)

# P(C|A)
fC_A = Factor(
    [0.8, 0.2, 0.1, 0.9], 
    {'A': ['a1', 'a0'],'C': ['c1', 'c0']}
)

# Define a factor that holds the *conditional* distribution P(D|BC)
fD_BC = Factor(
    [0.95, 0.05, 0.9, 0.1,0.8, 0.2, 0.0, 1.0], 
    {'B': ['b1', 'b0'],'C': ['c1', 'c0'],'D': ['d1', 'd0']}
)

# P(E|C)
fE_C = Factor(
    [0.7, 0.3, 0.0, 1.0], 
    {'C': ['c1', 'c0'],'E': ['e1', 'e0']}
)

In [4]:
# fA is essentially a prior distribution.
fA

factor(A)
A 
a1    0.6
a0    0.4
dtype: float64

In [5]:
# fB_A holds a conditional distribution: P(B|A)
fB_A

factor(A,B)
A   B 
a1  b1    0.20
    b0    0.80
a0  b1    0.75
    b0    0.25
dtype: float64

In [6]:
# Multiplying the factor with a *prior* with a *conditional* distribution, yields
# a *joint* distribution.
fAB = fA * fB_A
display(fAB)

factor(A,B)
A   B 
a1  b1    0.12
    b0    0.48
a0  b1    0.30
    b0    0.10
dtype: float64

In [7]:
# By summing out A, we'll get the prior over B
fAB.sum_out('A')

factor(B)
B 
b1    0.42
b0    0.58
dtype: float64

In [8]:
# Reordering the variables is done through reorder_scope()
(fB_A * fD_BC).reorder_scope(['A', 'B', 'D', 'C'])

factor(A,B,D,C)
A   B   D   C 
a1  b1  d1  c1    0.1900
            c0    0.1800
        d0  c1    0.0100
            c0    0.0200
    b0  d1  c1    0.6400
            c0    0.0000
        d0  c1    0.1600
            c0    0.8000
a0  b1  d1  c1    0.7125
            c0    0.6750
        d0  c1    0.0375
            c0    0.0750
    b0  d1  c1    0.2000
            c0    0.0000
        d0  c1    0.0500
            c0    0.2500
dtype: float64

In [9]:
# Factors can be easily serialized
fA.as_dict()

{'type': 'Factor',
 'scope': ['A'],
 'states': {'A': ['a1', 'a0']},
 'data': [0.6, 0.4]}

In [10]:
# Access individual entries as you would a dictionary
fA['a0']

0.4

In [11]:
# Note that the order of the states should correspond to the order of the variables.
fAB['a0', 'b1']

0.30000000000000004

In [12]:
# Summing out removes a variable from the Factor.
fAB.sum_out('A')

factor(B)
B 
b1    0.42
b0    0.58
dtype: float64

In [13]:
# Projecting onto a variable is equivalent to summing out the complement
fA.project('A')

factor(A)
A 
a1    0.6
a0    0.4
dtype: float64

In [14]:
# Factors can also be added, altough this may be of limited use
fA + fB_A

factor(A,B)
A   B 
a1  b1    0.80
    b0    1.40
a0  b1    1.15
    b0    0.65
dtype: float64

In [15]:
# If you want, you can also convert to a pandas.Series
sfAB = fAB.as_series()
sfAB

A   B 
a1  b1    0.12
    b0    0.48
a0  b1    0.30
    b0    0.10
dtype: float64

In [16]:
# Or create a Factor from a Series. Note that this could change the order of the states.
# This is a result of the fact that (as far as I can find) pandas.MultiIndex doesn't 
# provide a way to keep the order.
Factor.from_series(sfAB)

factor(A,B)
A   B 
a0  b0    0.10
    b1    0.30
a1  b0    0.48
    b1    0.12
dtype: float64

In [17]:
# Factors can be set/updated using the `set` method. This is useful for factors
# that indicate evidence. This sets all entries with 'a0' to 0.
fAB.set(0, A='a0')

factor(A,B)
A   B 
a1  b1    0.12
    b0    0.48
a0  b1    0.00
    b0    0.00
dtype: float64

In [18]:
# This sets everything *but* 'b0' to 1.
fAB.set_complement(1, B='b0')

factor(A,B)
A   B 
a1  b1    1.00
    b0    0.48
a0  b1    1.00
    b0    0.10
dtype: float64

In [19]:
# Result can be chained
fAB.set(0, A='a0').set_complement(1, A='a0')

factor(A,B)
A   B 
a1  b1    1.0
    b0    1.0
a0  b1    0.0
    b0    0.0
dtype: float64

In [20]:
# If you'd rather to this in place, either pass 'inplace=True' to `set()` or
# use alternative assignment. 
fAB['a0'] = 1

In [21]:
# Note that in this case you're responsible for selecting the right variable:
fAB[:, 'b1'] = 2
print(fAB)
print()

try:
    fAB['b1'] = 2
except Exception as e:
    print('That will not work:', type(e), e)
    print()

factor(A,B)
A   B 
a1  b1    2.00
    b0    0.48
a0  b1    2.00
    b0    1.00
dtype: float64

That will not work: <class 'KeyError'> 'b1'

