# 2. Bags of Factors

In [1]:
%run '_preamble.ipynb'

Python version: 3.8.10
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

available imports:
  import os
  import logging
  import pandas as pd
  import numpy as np

connect to this kernel with:
  jupyter console --existing 93487911-3e84-4cc3-ad67-5c0c5468e372

Could not create logging directory "../logs"
Logging to: "../logs/notebook.log"
Current date/time: 11-06-2021, 21:24
Current working directory: "/Users/melle/software-development/thomas-master/notebooks"


In [2]:
from thomas.core.models import Bag
from thomas.core.factors import Factor
from thomas.core.factors import CPT
from thomas.core import examples

from IPython.display import display, HTML

### Sprinkler network

In [3]:
# Get the Factors for the Sprinkler network
fA, fB_A, fC_A, fD_BC, fE_C = examples.get_sprinkler_factors()

In [4]:
# By putting the factors in a Bag, we can reason over them
bag = Bag(factors=[fA, fB_A, fC_A, fD_BC, fE_C])

In [5]:
# The scope gives an overview of which variables are in the bag.
bag.scope

{'A', 'B', 'C', 'D', 'E'}

In [6]:
# Compute the factor over C
bag.variable_elimination(['C'])

factor(C)
C 
c1    0.52
c0    0.48
dtype: float64

In [7]:
# Compute the (unnormalized) factor over C and A=a1
bag.variable_elimination(['C'], {'A': 'a1'})

factor(C)
C 
c1    0.48
c0    0.12
dtype: float64

In [8]:
# From this we can deduce that P(A=a1) = 0.6
Pa1 = bag.variable_elimination(['C'], {'A': 'a1'}).sum()
f'P(A=a1) = {Pa1:.2}'

'P(A=a1) = 0.6'

In [9]:
# Normalizing is easy and gives the conditional probability P(C|A=a1)
bag.variable_elimination(['C'], {'A': 'a1'}).normalize()

factor(C)
C 
c1    0.8
c0    0.2
dtype: float64

In [10]:
# We can also compute the joint distribution over A and C. 
bag.variable_elimination(['A', 'C'])

factor(A,C)
A   C 
a1  c1    0.48
    c0    0.12
a0  c1    0.04
    c0    0.36
dtype: float64

In [11]:
# Or even over the entire joint distribution (bag.scope holds/returns a list of all 
# variables in the bag). Don't do this for anything other than really simple networks
# as the number of entries in the JPT will blow up quickly.
bag.variable_elimination(bag.scope)

{'E', 'D', 'A', 'C', 'B'}


factor(A,B,C,D,E)
A   B   C   D   E 
a1  b1  c1  d1  e1    0.0638
                e0    0.0274
            d0  e1    0.0034
                e0    0.0014
        c0  d1  e1    0.0000
                e0    0.0216
            d0  e1    0.0000
                e0    0.0024
    b0  c1  d1  e1    0.2150
                e0    0.0922
            d0  e1    0.0538
                e0    0.0230
        c0  d1  e1    0.0000
                e0    0.0000
            d0  e1    0.0000
                e0    0.0960
a0  b1  c1  d1  e1    0.0200
                e0    0.0086
            d0  e1    0.0011
                e0    0.0005
        c0  d1  e1    0.0000
                e0    0.2430
            d0  e1    0.0000
                e0    0.0270
    b0  c1  d1  e1    0.0056
                e0    0.0024
            d0  e1    0.0014
                e0    0.0006
        c0  d1  e1    0.0000
                e0    0.0000
            d0  e1    0.0000
                e0    0.0900
dtype: float64

### Student Network

In [12]:
factors = examples.get_student_CPTs()
bag = Bag('Student', list(factors.values()))

In [13]:
# Scope of a Bag is an unordered set() of all random variables
bag.scope

{'D', 'G', 'I', 'L', 'S'}

In [14]:
bag.compute_posterior(['G'], {}, ['I'], {})

G,g1,g2,g3
I,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
i0,0.2,0.34,0.46
i1,0.74,0.168,0.092


In [15]:
bag.compute_posterior(['G'], {}, [], {'I': 'i0'})

G,g1,g2,g3
,0.2,0.34,0.46


In [16]:
bag.P('G|I')

G,g1,g2,g3
I,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
i0,0.2,0.34,0.46
i1,0.74,0.168,0.092


In [17]:
# FIXME: I would expect this to return a CPT?
# not sure if Factor can deal with incomplete variable assignments, though.
bag.P('G=g1|I')

array([0.2 , 0.74])

In [18]:
# Scope of a CPT is an ordered list: it indicates which variable is the 
# conditioning variable (which is shown *last*).
bag.P('G|I').scope

['I', 'G']