# Build Net

In [8]:
import os
from bni_netica.bni_netica import Net
from benchmarking.data_generator import build_random_bn
from bn_helpers.support_tools import get_nets, printNet
from benchmarking.data_utils import save_nets_to_parquet, load_nets_from_parquet
from benchmarking.benchmarking_utils import pickTwoRandomNodes
from bn_helpers.support_tools import get_BN_structure, printNet
from bn_helpers.bn_helpers import AnswerStructure, BnHelper
from ollama.prompt import answer_this_prompt


In [2]:
output_path = "./nets/outputs/"

net3 = build_random_bn(
    n_nodes=12,
    name="SkewBN",
    cpt_mode="random",
    dirichlet_alpha=0.3,     # spiky rows
    avg_edges_per_node=1.4,
    max_in_degree=2,
    sprinkle_motifs=5,
    # save_path=output_path
)

printNet(net3)

A -> ['F']
E -> ['L', 'F']
F -> ['D']
D -> []
L -> []
I -> ['C', 'H']
B -> ['K', 'J']
J -> ['C']
C -> []
G -> ['K']
K -> ['H']
H -> []


## Test the boundary

In [3]:
net2 = build_random_bn(
    n_nodes=1000,
    name="MaxRandomBN",
    cpt_mode="random",
    avg_edges_per_node=2,
    max_in_degree=2,
    sprinkle_motifs=5,
    # save_path=output_path
)

# printNet(net2)


# Build Dataset

In [5]:
# import all nets from the collection
nets_collection = get_nets()

# print all nets
for net in nets_collection:
    printNet(net)
    print()

A -> ['B', 'C']
B -> ['D']
C -> ['D', 'E']
D -> []
E -> []

VisitAsia -> ['Tuberculosis']
Tuberculosis -> ['TbOrCa']
Smoking -> ['Cancer', 'Bronchitis']
Cancer -> ['TbOrCa']
TbOrCa -> ['XRay', 'Dyspnea']
XRay -> []
Bronchitis -> ['Dyspnea']
Dyspnea -> []

Class -> ['Obsv', 'Result']
Obsv -> ['Classifier']
Classifier -> ['Result']
Result -> []

Sex -> ['Smoking_status', 'LVH', 'Coronary_artery_disease']
Smoking_status -> ['Coronary_artery_disease']
Systolic_Blood_Pressure -> ['LVH', 'Coronary_artery_disease']
Age -> ['LVH', 'Coronary_artery_disease']
LVH -> ['Coronary_artery_disease']
Diabetes_mellitus -> ['Coronary_artery_disease']
Total_chol -> ['Coronary_artery_disease']
HDL_Status -> ['Coronary_artery_disease']
Coronary_artery_disease -> []

Tampering -> ['Alarm']
Fire -> ['Alarm', 'Smoke']
Alarm -> ['Leaving']
Leaving -> ['Report']
Smoke -> []
Report -> []

P1 -> ['C', 'Color_P1']
P2 -> ['C', 'Color_P2']
C -> ['Color_C']
Color_P1 -> []
Color_C -> []
Color_P2 -> []

Ecstazine -> ['N

In [1]:
data_output = "./benchmarking/data"

In [None]:
# Generate 500 random nets
nets = nets_collection + \
[ 
    build_random_bn(n_nodes=i,
        cpt_mode="random",
        avg_edges_per_node=2,
        max_in_degree=2,
        sprinkle_motifs=5
    )
    for i in range(3,800)
]

# Save them
save_nets_to_parquet(nets, os.path.join(data_output, "nets_dataset_800.parquet"))

In [4]:
LOAD_NETS = True
# Load them back
if LOAD_NETS:
    loaded_nets = load_nets_from_parquet(os.path.join(data_output, "nets_dataset.parquet"))


Loaded 509 nets from ./benchmarking/data/nets_dataset.parquet


In [5]:
for net in loaded_nets[:20]:
  printNet(net)
  print()

A -> ['B', 'C']
B -> ['D']
C -> ['D', 'E']
D -> []
E -> []

VisitAsia -> ['Tuberculosis']
Tuberculosis -> ['TbOrCa']
Smoking -> ['Cancer', 'Bronchitis']
Cancer -> ['TbOrCa']
TbOrCa -> ['XRay', 'Dyspnea']
XRay -> []
Bronchitis -> ['Dyspnea']
Dyspnea -> []

Class -> ['Obsv', 'Result']
Obsv -> ['Classifier']
Classifier -> ['Result']
Result -> []

Sex -> ['Smoking_status', 'LVH', 'Coronary_artery_disease']
Smoking_status -> ['Coronary_artery_disease']
Systolic_Blood_Pressure -> ['LVH', 'Coronary_artery_disease']
Age -> ['LVH', 'Coronary_artery_disease']
LVH -> ['Coronary_artery_disease']
Diabetes_mellitus -> ['Coronary_artery_disease']
Total_chol -> ['Coronary_artery_disease']
HDL_Status -> ['Coronary_artery_disease']
Coronary_artery_disease -> []

Tampering -> ['Alarm']
Fire -> ['Alarm', 'Smoke']
Alarm -> ['Leaving']
Leaving -> ['Report']
Smoke -> []
Report -> []

P1 -> ['C', 'Color_P1']
P2 -> ['C', 'Color_P2']
C -> ['Color_C']
Color_P1 -> []
Color_C -> []
Color_P2 -> []

Ecstazine -> ['N

# Benchmark simple Query: Is D-connected?

In [24]:
from pydantic import BaseModel

class ValidateScore(BaseModel):
    score: int

def get_ground_truth(net, node1, node2):
    bn_helper = BnHelper(function_name='is_XY_dconnected')

    ans = bn_helper.is_XY_dconnected(net, node1, node2)
    if ans:
        template = f"Yes, {node1} is d-connected to {node2}, which means that entering evidence for {node1} would change the probability of {node2} and vice versa."
    else:
        template = f"No, {node1} is not d-connected to {node2}, which means that entering evidence for {node1} would not change the probability of {node2}."

    return template

def get_validation_score(y, y_hat):
    validation_prompt = f"""
    Ground truth: {y}
    LLM output: {y_hat}
    Return 1 if the LLM output is correct, otherwise return 0.
    """

    validate_score = answer_this_prompt(validation_prompt, format=ValidateScore.model_json_schema())
    validate_score = ValidateScore.model_validate_json(validate_score)
    return validate_score.score

def output_score_raw_model(net):
    node1, node2 = pickTwoRandomNodes(net)
    print('Two random nodes:', node1, node2)

    bn = get_BN_structure(net)
    prompt = f"In this Bayesian Network:\n{bn}\n"
    prompt += f"Is changing the evidence of {node1} going to change the probability of {node2}?"

    y = get_ground_truth(net, node1, node2)
    print('y:\n', y)

    ans = answer_this_prompt(prompt, format=AnswerStructure.model_json_schema())
    y_hat = AnswerStructure.model_validate_json(ans)
    print('y_hat:\n', y_hat)

    return get_validation_score(y, y_hat)

print(output_score_raw_model(loaded_nets[0]))

Two random nodes: D A
y:
 Yes, D is d-connected to A, which means that entering evidence for D would change the probability of A and vice versa.
y_hat:
 answer='Yes, changing the evidence of D will change the probability of A. In a Bayesian Network, the evidence of a node (like D) affects the probabilities of its parents (like A) through the joint probability distribution. When D is observed (evidence is given), it constrains the probabilities of its parents (A and C, since A is a parent of D and C is a parent of D). This means that the probability of A is affected by the evidence of D. However, the effect may vary depending on the specific structure of the network and the evidence provided. For example, if the evidence is only D, then the probability of A is affected because A is a parent of D. If the evidence is not D but another node, like B, then the effect on A would be different. In general, any node that is a parent of A will have its evidence affect the probability of A, but th

In [25]:
from run import execute_query

def output_score_baymin(net):
    node1, node2 = pickTwoRandomNodes(net)
    print('Two random nodes:', node1, node2)

    y = get_ground_truth(net, node1, node2)
    print('y:\n', y)

    query = f"Is changing the evidence of {node1} going to change the probability of {node2}?"
    ans = execute_query(net, query)
    y_hat = AnswerStructure.model_validate_json(ans)
    print('y_hat:\n', y_hat)

    return get_validation_score(y, y_hat)

print(output_score_baymin(loaded_nets[0]))

Two random nodes: E D
y:
 Yes, E is d-connected to D, which means that entering evidence for E would change the probability of D and vice versa.

BayMin:
{"function_name": "does_Z_change_dependency_XY"}

from_node='E' to_node='D' evidence_node='E'
Output: False, details: {'before': True, 'after': True}


ValidationError: 1 validation error for AnswerStructure
  JSON input should be string, bytes or bytearray [type=json_type, input_value=('{"answer": "No, observi...}\n', 'last_net': None}), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/json_type