In [6]:
from pqp.variable import make_vars, Variable
from pqp.graph import Graph
from pqp.expression import Expression, Marginal, P, Product, Quotient
from pqp import CategoricalDistribution

import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd

In [7]:
x, y, z = make_vars("xyz")
g = Graph([
    z <= x,
    y <= z,
    x & y
])
estimand = g.idc([y], [x])
estimand.display()

<IPython.core.display.Math object>

In [8]:
I = object()
def p(*args):
    acc = vars = []
    given = []
    for arg in args:
        if arg is I:
            acc = given
        elif not isinstance(arg, Variable):
            raise ValueError("Expected Variable")
        else:
            acc.append(arg)
    return P(vars, given)

V = Variable

In [9]:
np.random.seed(137)
n = 5

# severity = np.random.binomial(3, 0.5, size=n)
# doctor_noise = np.random.binomial(1, 0.1, size=n)
# print((severity > 1).astype(int))
# print(doctor_noise)
# treatment = ((severity > 1) != doctor_noise).astype(int)

# # print(treatment)

# pathway_predisposition = 0# np.random.binomial(2, 0.5, size=n)
# pathway = pathway_predisposition - treatment

# outcome_noise = 0#np.random.binomial(2, 0.5, size=n)
# outcome = pathway + severity + outcome_noise

severity = np.array([0, 1])
treatment = np.array(severity)
pathway = (1-treatment)
outcome = severity + pathway


model_vars = {}
for name in ["severity", "treatment", "pathway", "outcome"]:
    model_vars[name] = Variable(name)

df = pd.DataFrame({
    "severity": severity,
    "treatment": treatment,
    "pathway": pathway,
    "outcome": outcome,
})

dist = CategoricalDistribution(df, prior=1)
df.head(10)

Unnamed: 0,severity,treatment,pathway,outcome
0,0,0,1,1
1,1,1,0,1


In [10]:
dist.approx(P([dist.vars.outcome], [dist.vars.treatment]), {"outcome": 0, "treatment": 0})

0.3333333333333333

In [11]:
_s = []
_t = []
_p = []
_o = []
probs = []

for s in [0, 1]:
    for t in [0, 1]:
        for p in [0, 1]:
            for o in [0, 1]:
                _s.append(s)
                _t.append(t)
                _p.append(p)
                _o.append(o)
                prob = dist.approx(P([dist.vars.outcome, dist.vars.pathway, dist.vars.treatment, dist.vars.severity]), {"outcome": o, "pathway": p, "treatment": t, "severity": s})
                probs.append(prob)

probs = pd.DataFrame({
    "severity": _s,
    "treatment": _t,
    "pathway": _p,
    "outcome": _o,
    "prob": probs
})

probs

Unnamed: 0,severity,treatment,pathway,outcome,prob
0,0,0,0,0,0.041667
1,0,0,0,1,0.041667
2,0,0,1,0,0.041667
3,0,0,1,1,0.375
4,0,1,0,0,0.041667
5,0,1,0,1,0.041667
6,0,1,1,0,0.041667
7,0,1,1,1,0.041667
8,1,0,0,0,0.041667
9,1,0,0,1,0.041667


In [12]:
data = pd.DataFrame({"x": [0], "y": [0], "z": [0]})
dist = CategoricalDistribution(data, prior=1)
joint = P([dist.vars.x, dist.vars.y, dist.vars.z])
# zeros = {"x": 0, "y": 0, "z": 0}
ones = {"x": 1, "y": 1, "z": 1}

dist.approx(joint, ones)

0.5

In [13]:
_x = []
_y = []
_z = []
probs = []

for x in [0, 1]:
    for y in [0, 1]:
        for z in [0, 1]:
            _x.append(x)
            _y.append(y)
            _z.append(z)
            prob = dist.approx(joint, {"x": x, "y": y, "z": z})
            probs.append(prob)

probs = pd.DataFrame({
    "x": _x,
    "y": _y,
    "z": _z,
    "prob": probs
})

probs

Unnamed: 0,x,y,z,prob
0,0,0,0,1.0
1,0,0,1,0.5
2,0,1,0,0.5
3,0,1,1,0.5
4,1,0,0,0.5
5,1,0,1,0.5
6,1,1,0,0.5
7,1,1,1,0.5


In [17]:
dist.domain_size(["x", "y", "z"])
dist.n_unique

{'x': 1, 'y': 1, 'z': 1}