In [1]:
import networkx as nx
from pybbn.probabilistic import create_reasoning_model

def get_model():
    d = nx.DiGraph()
    d.add_nodes_from(['drug', 'gender', 'recovery'])
    d.add_edges_from([('gender', 'drug'), ('gender', 'recovery'), ('drug', 'recovery')])
    
    p = {
        'gender': {
            'columns': ['gender', '__p__'],
            'data': [
                ['male', 0.51], ['female', 0.49]
            ]
        },
        'drug': {
            'columns': ['gender', 'drug', '__p__'],
            'data': [
                ['female', 'no', 0.24],
                ['female', 'yes', 0.76],
                ['male', 'no', 0.76],
                ['male', 'yes', 0.24]
            ]
        },
        'recovery': {
            'columns': ['gender', 'drug', 'recovery', '__p__'],
            'data': [
                ['female', 'no', 'no', 0.90],
                ['female', 'no', 'yes', 0.10],
                ['female', 'yes', 'no', 0.27],
                ['female', 'yes', 'yes', 0.73],
                ['male', 'no', 'no', 0.99],
                ['male', 'no', 'yes', 0.01],
                ['male', 'yes', 'no', 0.07],
                ['male', 'yes', 'yes', 0.93]
            ]
        }
    }
    
    model = create_reasoning_model(d, p)
    return model

model = get_model()

In [2]:
Xy = model.sample(max_samples=10_000) \
    .assign(
        gender=lambda d: d['gender'].map({'male': 1, 'female': 0}),
        drug=lambda d: d['drug'].map({'yes': 1, 'no': 0}),
        recovery=lambda d: d['recovery'].map({'yes': 1, 'no': 0}),
    )
Xy.shape

(10000, 3)

In [3]:
Xy.head()

Unnamed: 0,gender,drug,recovery
0,1,0,0
1,1,1,1
2,0,1,0
3,1,1,1
4,0,1,1


In [4]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

X, y = Xy[['gender', 'drug']], Xy['recovery']

y_model = RandomForestClassifier(n_jobs=-1, random_state=37) \
    .fit(X, y)

y_pred = y_model.predict_proba(X)

In [5]:
import pandas as pd

u = pd.DataFrame(y_pred, columns=['p0', 'p1']) \
    .assign(y0=(y==0).astype(int)) \
    .assign(y1=(y==1).astype(int)) \
    .assign(u0=lambda d: d['y0'] - d['p0']) \
    .assign(u1=lambda d: d['y1'] - d['p1'])
u.shape

(10000, 6)

In [6]:
X, y = Xy[['gender', 'drug', 'recovery']], u['u0']

u0_model = RandomForestRegressor(n_jobs=-1, random_state=37) \
    .fit(X, y)

In [7]:
X, y = Xy[['gender', 'drug', 'recovery']], u['u1']

u1_model = RandomForestRegressor(n_jobs=-1, random_state=37) \
    .fit(X, y)

In [17]:
import numpy as np

def to_df(v, c):
    return pd.DataFrame([v], columns=c)

u0_pred = u0_model.predict(to_df([1, 0, 0], ['gender', 'drug', 'recovery']))[0]
u1_pred = u1_model.predict(to_df([1, 0, 0], ['gender', 'drug', 'recovery']))[0]

u_pred = np.array([u0_pred, u1_pred])
z_pred = y_model.predict_proba(to_df([1, 1], ['gender', 'drug']))[0]

u_pred, z_pred, z_pred + u_pred

(array([ 0.39675612, -0.39675612]),
 array([0.59693957, 0.40306043]),
 array([0.99369569, 0.00630431]))

In [19]:
u0_pred = u0_model.predict(to_df([1, 1, 1], ['gender', 'drug', 'recovery']))[0]
u1_pred = u1_model.predict(to_df([1, 1, 1], ['gender', 'drug', 'recovery']))[0]

u_pred = np.array([u0_pred, u1_pred])
z_pred = y_model.predict_proba(to_df([1, 0], ['gender', 'drug']))[0]

u_pred, z_pred, z_pred + u_pred

(array([-0.59693957,  0.59693957]),
 array([0.60324388, 0.39675612]),
 array([0.00630431, 0.99369569]))

In [18]:
u0_pred = u0_model.predict(to_df([0, 0, 0], ['gender', 'drug', 'recovery']))[0]
u1_pred = u1_model.predict(to_df([0, 0, 0], ['gender', 'drug', 'recovery']))[0]

u_pred = np.array([u0_pred, u1_pred])
z_pred = y_model.predict_proba(to_df([0, 1], ['gender', 'drug']))[0]

u_pred, z_pred, z_pred + u_pred

(array([ 0.40852612, -0.40852612]),
 array([0.60535339, 0.39464661]),
 array([ 1.01387952, -0.01387952]))

In [20]:
u0_pred = u0_model.predict(to_df([0, 1, 1], ['gender', 'drug', 'recovery']))[0]
u1_pred = u1_model.predict(to_df([0, 1, 1], ['gender', 'drug', 'recovery']))[0]

u_pred = np.array([u0_pred, u1_pred])
z_pred = y_model.predict_proba(to_df([0, 0], ['gender', 'drug']))[0]

u_pred, z_pred, z_pred + u_pred

(array([-0.60535339,  0.60535339]),
 array([0.59147388, 0.40852612]),
 array([-0.01387952,  1.01387952]))