# Generating conditional probability tables subject to constraints


In [1]:
import os
from pathlib import Path

from itertools import product

import numpy as np
import pandas as pd
import xarray as xr

import polytope as pc

from pypoman import compute_polytope_vertices, project_polytope

from fake_data_for_learning import BayesianNodeRV, FakeDataBayesianNetwork
from fake_data_for_learning.utils import RandomCpt, MapMultidimIndexToLinear, ProbabilityPolytope
from fake_data_for_learning import utils as ut

Suppose we want to generate data from a discrete Bayesian network, such as

Product -> Days <- Rating, 

where e.g. Product is the (insurance) product name, Rating is rating strength (i.e. market price / technical price) for a submission, and Days is the number of days to generate a quote for the submission.

The number of entries in probability and conditional probability tables to define this Bayesian network is

$ | Product | + | Rating | + | Product | \times | Rating | \times | Days |$.

For example, let us define Industry and Rating as follows

In [2]:
product_values = ['financial', 'liability', 'property']
product = BayesianNodeRV('product', np.array([0.2, 0.5, 0.3]), values=product_values)
rating_values = range(2)
rating = BayesianNodeRV('rating', np.array([0.3, 0.6]))

Suppose that Days is also discrete, e.g.

In [3]:
days_values = range(5)

Then if we choose the ordering of the conditional probability table axes as Product, Rating, Days, we can generate the entries of the conditional probability table for Days conditioned on Industry and Rating with `utils.RandomCpt`:

In [4]:
random_cpt = RandomCpt(len(product_values), len(rating_values), len(days_values))
random_cpt()

array([[[0.2001 , 0.11988, 0.23985, 0.22664, 0.21353],
        [0.22428, 0.21437, 0.22648, 0.21487, 0.12001]],

       [[0.30496, 0.13342, 0.17955, 0.25262, 0.12945],
        [0.14661, 0.28723, 0.17369, 0.25179, 0.14068]],

       [[0.29282, 0.11601, 0.22178, 0.1165 , 0.25289],
        [0.24524, 0.2051 , 0.10672, 0.19627, 0.24667]]])

So the total number of probability table entries to specify is, as in the formula above,

In [5]:
f'Number of probability table entries: {len(product_values) + len(rating_values) +  (len(product_values) * len(rating_values) * len(days_values))}'

'Number of probability table entries: 35'

It would be nice to specify certain properties of the matrix without having to change entries individually. For example, we may want to insist that

\begin{equation*}
E(D | P = property) = 3.5 \\
E(D | P = financial) = 1.0 \\
E(D | P= liability) = 2.0
\end{equation*}

Denote the entries of the conditional probability table as 

$$(\rho_{p, r | d})$$

The the above constraints become

\begin{equation*}
\frac{1}{|R|} \sum_{r, d} d \, \rho_{\mathrm{property},\, r\, | d} = 3.5 \\
\frac{1}{|R|} \sum_{r, d} d \, \rho_{\mathrm{financial},\, r\, | d} = 1.0\\
\frac{1}{|R|} \sum_{r, d} d \, \rho_{\mathrm{liability},\, r\, | d} = 2.0.
\end{equation*}

As $(\rho)$ is a conditional probability table, we also have the constraints 

\begin{equation*}
0 \leq \rho_{p,\,r\,|d} \leq 1 \textrm{ for all }(p,\,r,\,d),\\
\sum_{d} \rho_{p,\,r,\,| d} = 1 \textrm{ for each pair } (p, \, r)
\end{equation*}

Together, these constraints define convex polytope contained in (probability) simplex $\Delta_{R-1} \subseteq \mathbb{R}^{R}$, where $R = |Product | \times | Rating | \times | Days|$ (see e.g. Chapter 1 of *Lectures on Algebraic Statistics*, Drton, Sturmfels, Sullivant). This polytope is defined as an intersection of half-spaces, i.e. using the so-called *H-representation* of the polytope, see *Lectures on Polytopes* by Ziegler, Chapters 0 and 1.

To generate a random (conditional) probability table to these constraints, the vertex-, or *V-representation* of the probability polytope $P$ is much more useful, because given the a vertex matrix $V$, where each column is a vertex of $P$ in $\mathbb{R}^R$, and all points in $P$ can be obtained as

$$
\begin{equation*}
x = V \cdot t
\end{equation*}
$$

where $t \in \mathbb{R}^N$, with $N$ being the number of vertices for $P$, and $t$ satisfying $0 \leq t_i \leq 1$, $\sum t_i = 1$.

Once we have determined the V-representation $V$, then the problem of generating conditional probability tables subject to our given expectation value constraints reduces to the much simpler problem of generating points on the non-negative quadrant of the unit (hyper) cube in $R^N$.

Before we get to our goal of generating these probability tables for our hit ratio problem, let's look at elementary examples.

## (Conditional) Probability Polytopes

The simplest example of a probability polytope is that of a Bernoulli random variable.

In [6]:
bernoulli = ut.ProbabilityPolytope(('outcome',), dict(outcome=range(2)))
A, b = bernoulli.get_probability_half_planes()
verts = compute_polytope_vertices(A, b)
verts

[array([1., 0.]), array([0., 1.])]

In order to use the V-description to we 

In [7]:
V = np.vstack(verts)
t = np.random.uniform(size=V.shape[0])
t = t / t.sum()
V, t
np.matmul(V, np.transpose(t))

array([0.31643, 0.68357])

In [8]:
tertiary = ut.ProbabilityPolytope(('outcome',), dict(outcome=range(3)))
A, b = tertiary.get_probability_half_planes()
compute_polytope_vertices(A, b)

[array([1., 0., 0.]), array([0., 1., 0.]), array([0., 0., 1.])]

In [9]:
conditional_bernoullis = ut.ProbabilityPolytope(
    ('input', 'output'), dict(input=range(2), output=range(2))
)
A, b = conditional_bernoullis.get_probability_half_planes()
verts = compute_polytope_vertices(A, b)
verts

[array([1., 0., 1., 0.]),
 array([1., 0., 0., 1.]),
 array([0., 1., 0., 1.]),
 array([0., 1., 1., 0.])]

The benefit of having the vertex-representation (V-representation) of the probability polytope is that generating random (conditional) probability tables is straightforward.

In [10]:
V = np.vstack(verts)
t = np.random.uniform(size=V.shape[0])
t = t / t.sum()
print(V, '\n\n', np.transpose(V), '\n\n', t)
np.matmul(np.transpose(V),np.transpose(t))

[[1. 0. 1. 0.]
 [1. 0. 0. 1.]
 [0. 1. 0. 1.]
 [0. 1. 1. 0.]] 

 [[1. 1. 0. 0.]
 [0. 0. 1. 1.]
 [1. 0. 0. 1.]
 [0. 1. 1. 0.]] 

 [0.24446 0.22964 0.26488 0.26102]


array([0.4741 , 0.5259 , 0.50547, 0.49453])

## Adding contraints on conditional expectation values

In [11]:
conditional_bernoullis.set_expectation_constraints(
    [ut.ExpectationConstraint(equation=dict(input=1), moment=1, value=0.5)]
)

In [12]:
conditional_bernoullis.get_expect_equations_col_indices(conditional_bernoullis.expect_constraints[0].equation)

[2, 3]

In [13]:
A, b = conditional_bernoullis.get_all_half_planes()

In [14]:
compute_polytope_vertices(A, b)

[array([1. , 0. , 0.5, 0.5]), array([0. , 1. , 0.5, 0.5])]

In [15]:
two_input_constrained_polytope = ut.ProbabilityPolytope(
    ('input', 'more_input', 'output'),
    dict(input=['hi', 'low'], more_input=range(2), output=range(2))
)
two_input_constrained_polytope.set_expectation_constraints(
    [ut.ExpectationConstraint(equation=dict(more_input=0), moment=1, value=0.25)]
)

In [16]:
A, b = two_input_constrained_polytope.get_all_half_planes()
compute_polytope_vertices(A, b)

[array([0.5, 0.5, 0. , 1. , 1. , 0. , 1. , 0. ]),
 array([0.5, 0.5, 0. , 1. , 1. , 0. , 0. , 1. ]),
 array([0.5, 0.5, 1. , 0. , 1. , 0. , 1. , 0. ]),
 array([0.5, 0.5, 1. , 0. , 1. , 0. , 0. , 1. ]),
 array([1. , 0. , 1. , 0. , 0.5, 0.5, 1. , 0. ]),
 array([1. , 0. , 1. , 0. , 0.5, 0.5, 0. , 1. ]),
 array([1. , 0. , 0. , 1. , 0.5, 0.5, 0. , 1. ]),
 array([1. , 0. , 0. , 1. , 0.5, 0.5, 1. , 0. ])]

## Hit rate polytope again

In [17]:
product_values = ['financial', 'liability', 'property']
rating_values = range(2)
days_values = range(4)

#industry = BayesianNodeRV('product', np.array([0.2, 0.5, 0.3]), values=product_values)
#rating = BayesianNodeRV('rating', np.array([0.4, 0.4, 0.2]))



hit_polytope = ProbabilityPolytope(
    ('product', 'rating', 'days'),
    coords = {
        'product': product_values, 
        'rating': rating_values, 
        'days': days_values
    }
)
hit_polytope.set_expectation_constraints(
    [
        ut.ExpectationConstraint(equation=dict(product='financial'), moment=1, value=0.2),
        ut.ExpectationConstraint(equation=dict(product='liability'), moment=1, value=0.9),
        ut.ExpectationConstraint(equation=dict(product='property'), moment=1, value=0.5),
    ]
)
A, b = hit_polytope.get_all_half_planes()

In [18]:
%%time
verts = compute_polytope_vertices(A, b)
print(len(verts))

504
CPU times: user 1.06 s, sys: 19.9 ms, total: 1.08 s
Wall time: 1.11 s


In [19]:
V = np.vstack(verts)
t = np.random.uniform(size=V.shape[0])
t = t / t.sum()
#print(V, '\n\n', np.transpose(V), '\n\n', t)
np.matmul(np.transpose(V),np.transpose(t))

array([0.87792, 0.0659 , 0.03517, 0.021  , 0.87594, 0.06809, 0.03525,
       0.02072, 0.4318 , 0.33617, 0.14911, 0.08293, 0.4174 , 0.33667,
       0.15762, 0.08831, 0.69352, 0.16335, 0.08678, 0.05635, 0.7013 ,
       0.15944, 0.08319, 0.05607])