# 2. Bags of Factors

In [1]:
%run '_preamble.ipynb'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

available imports:
  import os
  import logging
  import pandas as pd
  import numpy as np

connect to this kernel with:
  jupyter console --existing 5d7c6244-e863-4d67-974f-8151a57870ae

Logging to: "/Users/melle/software-development/thomas-master/logs/2. Bags of factors.log"
Current date/time: 07-07-2020, 23:45
Current working directory: "/Users/melle/software-development/thomas-master/notebooks"


In [2]:
from thomas.core.bag import Bag
from thomas.core.factor import Factor
from thomas.core.cpt import CPT
from thomas.core import examples

from IPython.display import display, HTML

### Sprinkler network

In [3]:
# Get the Factors for the Sprinkler network
fA, fB_A, fC_A, fD_BC, fE_C = examples.get_sprinkler_factors()

In [4]:
# By putting the factors in a Bag, we can reason over them
bag = Bag(factors=[fA, fB_A, fC_A, fD_BC, fE_C])

In [5]:
# The scope gives an overview of which variables are in the bag.
bag.scope

{'A', 'B', 'C', 'D', 'E'}

In [6]:
# Compute the factor over C
bag.eliminate(['C'])

factor(C)
C 
c1    0.52
c0    0.48
dtype: float64

In [7]:
# Compute the (unnormalized) factor over C and A=a1
bag.eliminate(['C'], {'A': 'a1'})

factor(C)
C 
c1    0.48
c0    0.12
dtype: float64

In [8]:
# From this we can deduce that P(A=a1) = 0.6
Pa1 = bag.eliminate(['C'], {'A': 'a1'}).sum()
f'P(A=a1) = {Pa1:.2}'

'P(A=a1) = 0.6'

In [9]:
# Normalizing is easy and gives the conditional probability P(C|A=a1)
bag.eliminate(['C'], {'A': 'a1'}).normalize()

factor(C)
C 
c1    0.8
c0    0.2
dtype: float64

In [10]:
# We can also compute the joint distribution over A and C. 
bag.eliminate(['A', 'C'])

factor(A,C)
A   C 
a1  c1    0.48
    c0    0.12
a0  c1    0.04
    c0    0.36
dtype: float64

In [11]:
# Or even over the entire joint distribution (bag.scope holds/returns a list of all 
# variables in the bag). Don't do this for anything other than really simple networks
# as the number of entries in the JPT will blow up quickly.
bag.eliminate(bag.scope)

factor(A,B,C,D,E)
A   B   C   D   E 
a1  b1  c1  d1  e1    6.384e-02
                e0    2.736e-02
            d0  e1    3.360e-03
                e0    1.440e-03
        c0  d1  e1    0.000e+00
                e0    2.160e-02
            d0  e1    0.000e+00
                e0    2.400e-03
    b0  c1  d1  e1    2.150e-01
                e0    9.216e-02
            d0  e1    5.376e-02
                e0    2.304e-02
        c0  d1  e1    0.000e+00
                e0    0.000e+00
            d0  e1    0.000e+00
                e0    9.600e-02
a0  b1  c1  d1  e1    1.995e-02
                e0    8.550e-03
            d0  e1    1.050e-03
                e0    4.500e-04
        c0  d1  e1    0.000e+00
                e0    2.430e-01
            d0  e1    0.000e+00
                e0    2.700e-02
    b0  c1  d1  e1    5.600e-03
                e0    2.400e-03
            d0  e1    1.400e-03
                e0    6.000e-04
        c0  d1  e1    0.000e+00
                e0    0.000e+00
   

### Student Network

In [12]:
factors = examples.get_student_CPTs()
bag = Bag('Student', list(factors.values()))

In [13]:
# Scope of a Bag is an unordered set() of all random variables
bag.scope

{'D', 'G', 'I', 'L', 'S'}

In [14]:
bag.compute_posterior(['G'], {}, ['I'], {})

G,g1,g2,g3
I,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
i0,0.2,0.34,0.46
i1,0.74,0.168,0.092


In [15]:
bag.compute_posterior(['G'], {}, [], {'I': 'i0'})

G,g1,g2,g3
,0.2,0.34,0.46


In [16]:
bag.P('G|I')

G,g1,g2,g3
I,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
i0,0.2,0.34,0.46
i1,0.74,0.168,0.092


In [17]:
# FIXME: I would expect this to return a CPT?
# not sure if Factor can deal with incomplete variable assignments, though.
bag.P('G=g1|I')

array([0.2 , 0.74])

In [18]:
# Scope of a CPT is an ordered list: it indicates which variable is the 
# conditioning variable (which is shown *last*).
bag.P('G|I').scope

['I', 'G']