# 2. Bags of Factors

In [1]:
%run '_preamble.ipynb'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
available imports:
  import os
  import logging
  import pandas as pd
  import numpy as np

connect to this kernel with:
  jupyter console --existing 81067729-db22-4dd6-b42a-ebeaa64a9794

Logging to: "/Users/melle/software-development/thomas-master/logs/2. Bags of factors.log"


In [2]:
from thomas.core.bag import Bag
from thomas.core import examples

from IPython.display import display, HTML

### Sprinkler network

In [3]:
# Get the Factors for the Sprinkler network
fA, fB_A, fC_A, fD_BC, fE_C = examples.get_sprinkler_factors()

In [4]:
# By putting the factors in a Bag, we can reason over them
bag = Bag(factors=[fA, fB_A, fC_A, fD_BC, fE_C])

In [5]:
# The scope gives an overview of which variables are in the bag.
bag.scope

{'A', 'B', 'C', 'D', 'E'}

In [6]:
# Compute the factor over C
bag.eliminate(['C'])

factor(C)
C
c0    0.48
c1    0.52
dtype: float64

In [7]:
# Compute the (unnormalized) factor over C and A=a1
bag.eliminate(['C'], {'A': 'a1'})

factor(C)
C
c0    0.12
c1    0.48
dtype: float64

In [8]:
# From this we can deduce that P(A=a1) = 0.6
Pa1 = bag.eliminate(['C'], {'A': 'a1'}).sum()
f'P(A=a1) = {Pa1:.2}'

'P(A=a1) = 0.6'

In [9]:
# Normalizing is easy and gives the conditional probability P(C|A=a1)
bag.eliminate(['C'], {'A': 'a1'}).normalize()

factor(C)
C
c0    0.2
c1    0.8
dtype: float64

In [10]:
# We can also compute the joint distribution over A and C. 
bag.eliminate(['A', 'C'])

factor(A,C)
A   C 
a0  c0    0.36
    c1    0.04
a1  c0    0.12
    c1    0.48
dtype: float64

In [11]:
# Calling `unstack` makes things a little easier on the eyes.
bag.eliminate(['A', 'C']).unstack()

C,c0,c1
A,Unnamed: 1_level_1,Unnamed: 2_level_1
a0,0.36,0.04
a1,0.12,0.48


In [12]:
# Or even over the entire joint distribution (bag.scope holds/returns a list of all 
# variables in the bag). Don't do this for anything other than really simple networks
# as the number of entries in the JPT will blow up quickly.
bag.eliminate(bag.scope).reorder_scope().sort_index()

factor(A,B,C,D,E)
A   B   C   D   E 
a0  b0  c0  d0  e0    0.09000
                e1    0.00000
            d1  e0    0.00000
                e1    0.00000
        c1  d0  e0    0.00060
                e1    0.00140
            d1  e0    0.00240
                e1    0.00560
    b1  c0  d0  e0    0.02700
                e1    0.00000
            d1  e0    0.24300
                e1    0.00000
        c1  d0  e0    0.00045
                e1    0.00105
            d1  e0    0.00855
                e1    0.01995
a1  b0  c0  d0  e0    0.09600
                e1    0.00000
            d1  e0    0.00000
                e1    0.00000
        c1  d0  e0    0.02304
                e1    0.05376
            d1  e0    0.09216
                e1    0.21504
    b1  c0  d0  e0    0.00240
                e1    0.00000
            d1  e0    0.02160
                e1    0.00000
        c1  d0  e0    0.00144
                e1    0.00336
            d1  e0    0.02736
                e1    0.06384
dty

### Student Network

In [13]:
factors = examples.get_student_CPTs()
bag = Bag('Student', list(factors.values()))

In [14]:
bag.P('G|I')

G,g1,g2,g3
I,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
i0,0.2,0.34,0.46
i1,0.74,0.168,0.092


In [15]:
# Order of the scope is important here, as it indicates which variable is the 
# conditioning variable.
bag.P('G|I').scope

['I', 'G']

In [16]:
bag.scope

{'D', 'G', 'I', 'L', 'S'}