# Build Net

In [25]:
import os
from bni_netica.bni_netica import Net
from benchmarking.data_generator import build_random_bn
from bn_helpers.get_structures_print_tools import get_nets, printNet, get_BN_structure, printPath
from benchmarking.data_utils import save_nets_to_parquet, load_nets_from_parquet
from benchmarking.benchmarking_utils import pickTwoRandomNodes
from bn_helpers.bn_helpers import AnswerStructure, BnHelper
from ollama.prompt import answer_this_prompt
from bn_helpers.utils import get_path


In [26]:
output_path = "./nets/outputs/"

# net3 = build_random_bn(
#     n_nodes=12,
#     name="SkewBN",
#     cpt_mode="random",
#     dirichlet_alpha=0.3,     # spiky rows
#     avg_edges_per_node=1.4,
#     max_in_degree=2,
#     sprinkle_motifs=5,
#     # save_path=output_path
# )

# printNet(net3)

## Test the boundary

In [27]:
net2 = build_random_bn(
    n_nodes=60,
    name="MaxRandomBN",
    cpt_mode="random",
    avg_edges_per_node=5,
    max_in_degree=5,
    sprinkle_motifs=5,
    # save_path=output_path
)

printNet(net2)


Z -> ['B2', 'C2', 'N1', 'A']
P -> ['C2', 'T1', 'X1', 'U']
C2 -> ['B', 'O', 'C1']
A2 -> ['H', 'L', 'F1', 'E1', 'B1']
B1 -> ['E2', 'H1', 'L']
D -> ['B2', 'B', 'H1']
H1 -> ['M', 'R', 'T1', 'Q', 'Y']
K -> ['N1', 'M', 'U1', 'M1']
Y -> ['F', 'G1', 'K1', 'Q1', 'V', 'M']
H -> ['U1', 'E']
U1 -> ['D2', 'R1', 'V1', 'M']
M -> ['P1', 'B', 'Q1', 'W1', 'I']
E2 -> ['L1', 'W', 'F', 'P1', 'G2', 'L', 'V']
J1 -> ['S1', 'C1', 'U', 'L1', 'G1']
L1 -> ['L', 'Z1', 'I1']
F2 -> ['T1', 'L', 'R1']
L -> ['D1', 'V', 'O']
X -> ['F1', 'B2', 'C1']
N -> ['D1', 'F1', 'H2']
F1 -> ['S', 'B2', 'C1', 'U', 'G', 'I']
T -> ['G2', 'D1', 'E1', 'O1', 'O']
O1 -> ['C', 'E', 'P1', 'B2']
B2 -> ['R', 'Z1', 'V', 'H2']
S1 -> ['P1', 'W1', 'C1']
U -> ['H2', 'R', 'G', 'E']
H2 -> ['O', 'D1', 'M1']
E1 -> ['K1', 'M1', 'A']
M1 -> ['R1', 'P1']
P1 -> ['G', 'G1', 'F', 'V']
V -> ['R', 'B']
B -> ['F', 'D1']
D2 -> ['W1', 'I', 'O']
O -> []
C1 -> ['C', 'K1']
N1 -> ['T1', 'Y1', 'G']
Z1 -> ['A', 'Q1', 'Y1', 'S', 'J']
A1 -> ['A', 'Q', 'C']
A -> ['G1', 'W1

# Build Dataset

In [28]:
# import all nets from the collection
nets_collection = get_nets()

# print all nets
for net in nets_collection:
    printNet(net)
    print()

A -> ['B', 'C']
B -> ['D']
C -> ['D', 'E']
D -> []
E -> []

VisitAsia -> ['Tuberculosis']
Tuberculosis -> ['TbOrCa']
Smoking -> ['Cancer', 'Bronchitis']
Cancer -> ['TbOrCa']
TbOrCa -> ['XRay', 'Dyspnea']
XRay -> []
Bronchitis -> ['Dyspnea']
Dyspnea -> []

Class -> ['Obsv', 'Result']
Obsv -> ['Classifier']
Classifier -> ['Result']
Result -> []

Sex -> ['Smoking_status', 'LVH', 'Coronary_artery_disease']
Smoking_status -> ['Coronary_artery_disease']
Systolic_Blood_Pressure -> ['LVH', 'Coronary_artery_disease']
Age -> ['LVH', 'Coronary_artery_disease']
LVH -> ['Coronary_artery_disease']
Diabetes_mellitus -> ['Coronary_artery_disease']
Total_chol -> ['Coronary_artery_disease']
HDL_Status -> ['Coronary_artery_disease']
Coronary_artery_disease -> []

Tampering -> ['Alarm']
Fire -> ['Alarm', 'Smoke']
Alarm -> ['Leaving']
Leaving -> ['Report']
Smoke -> []
Report -> []

P1 -> ['C', 'Color_P1']
P2 -> ['C', 'Color_P2']
C -> ['Color_C']
Color_P1 -> []
Color_C -> []
Color_P2 -> []

Ecstazine -> ['N

In [29]:
data_output = "./benchmarking/data"

In [30]:
# # Generate 500 random nets
# nets = nets_collection + \
# [ 
#     build_random_bn(n_nodes=i,
#         cpt_mode="random",
#         avg_edges_per_node=2,
#         max_in_degree=2,
#         sprinkle_motifs=5
#     )
#     for i in range(3,800)
# ]

# # Save them
# save_nets_to_parquet(nets, os.path.join(data_output, "nets_dataset_800.parquet"))

In [31]:
LOAD_NETS = False
# Load them back
if LOAD_NETS:
    loaded_nets = load_nets_from_parquet(os.path.join(data_output, "nets_dataset.parquet"))


In [32]:
# for net in loaded_nets[:20]:
#   printNet(net)
#   print()

# Benchmark simple Query: Is D-connected?

In [33]:
nets_collection = get_nets()

In [34]:
def get_explain_XY_dconnected(net, node1, node2):
    open_path = get_path(net, node1, node2)
    ans = (f"Yes, {node1} is d-connected to {node2}, "
          f"which means that entering evidence for {node1} would "
          f"change the probability of {node2} and vice versa. They d-connected through the following path: {open_path}")
    return ans

In [35]:
QWEN = "qwen3:1.7b"
GPT_OSS = "gpt-oss-bn-json"
MODEL = QWEN

printNet(nets_collection[-1])
targetNet = nets_collection[0]
targetBN = get_BN_structure(targetNet)

node1 = "B"
node2 = "C"

bn_helper = BnHelper(function_name='is_XY_connected')

ans = bn_helper.is_XY_connected(targetNet, node1, node2)
print(ans)

dcon_template = get_explain_XY_dconnected(targetNet, node1, node2)

prompt = f"In this Bayesian Network:\n{targetBN}\n"
prompt += f"Is changing the evidence of {node1} going to change the probability of {node2}?"
prompt += f"Answer exactly as this template: {dcon_template}"

result = answer_this_prompt(prompt, format=AnswerStructure.model_json_schema(), model=MODEL)
print(result)

# y = get_ground_truth(targetNet, node1, node2)
# print('y:\n', y)

Rainfall -> ['TreeCond', 'PesticideInRiver', 'RiverFlow']
Drought -> ['TreeCond', 'RiverFlow']
TreeCond -> []
PesticideUse -> ['PesticideInRiver']
PesticideInRiver -> ['FishAbundance']
RiverFlow -> ['FishAbundance']
FishAbundance -> []
True
{"answer": "Yes, B is d-connected to C, which means that entering evidence for B would change the probability of C and vice versa. They d-connected through the following path: ['B', 'A', 'C']"}



In [36]:
def get_explain_XY_dseperated(net, node1, node2):
  import random
  # no nodes observed
  bn_helper = BnHelper(function_name="get_common_effect")
  blocked_nodes = bn_helper.get_common_effect(net, node1, node2)

  # get one randome node in blocked_nodes
  random_blocked_node = random.choice(list(blocked_nodes))

  ans = (f"No, {node1} is not d-connected to {node2}, "
  f"which means that entering evidence for {node1} would not "
  f"change the probability of {node2}. They are blocked by {random_blocked_node} dued to common effect.")

  return ans

In [37]:
printNet(nets_collection[-1])
targetNet = nets_collection[-1]
targetBN = get_BN_structure(targetNet)

node1 = "PesticideUse"
node2 = "Rainfall"

dsep_template = get_explain_XY_dseperated(targetNet, node1, node2)

prompt = f"In this Bayesian Network:\n{targetBN}\n"
prompt += f"Is changing the evidence of {node1} going to change the probability of {node2}?"
prompt += f"Answer exactly as this template: {dsep_template}"

result = answer_this_prompt(prompt, format=AnswerStructure.model_json_schema(), model=MODEL)
print(result)



Rainfall -> ['TreeCond', 'PesticideInRiver', 'RiverFlow']
Drought -> ['TreeCond', 'RiverFlow']
TreeCond -> []
PesticideUse -> ['PesticideInRiver']
PesticideInRiver -> ['FishAbundance']
RiverFlow -> ['FishAbundance']
FishAbundance -> []
{"answer": "Yes, PesticideUse is d-connected to Rainfall, which means that entering evidence for PesticideUse would change the probability of Rainfall. They are blocked by PesticideInRiver due to common effect."}



In [38]:
from pydantic import BaseModel

class ValidateScore(BaseModel):
    score: int

def get_ground_truth(net, node1, node2):
    bn_helper = BnHelper(function_name='is_XY_connected')

    ans = bn_helper.is_XY_connected(net, node1, node2)
    if ans:
        template = get_explain_XY_dconnected(net, node1, node2)
    else:
        template = get_explain_XY_dseperated(net, node1, node2)

    return template

def get_validation_score(y, y_hat):
    validation_prompt = f"""
    Ground truth: {y}
    LLM output: {y_hat}
    Return 1 if the LLM output is correct, otherwise return 0.
    """

    validate_score = answer_this_prompt(validation_prompt, format=ValidateScore.model_json_schema())
    validate_score = ValidateScore.model_validate_json(validate_score)
    return validate_score.score

def output_score_raw_model(net):
    node1, node2 = pickTwoRandomNodes(net)
    # print('Two random nodes:', node1, node2)

    bn = get_BN_structure(net)
    prompt = f"In this Bayesian Network:\n{bn}\n"
    prompt += f"Is changing the evidence of {node1} going to change the probability of {node2}?"

    y = get_ground_truth(net, node1, node2)
    # print('y:\n', y)

    ans = answer_this_prompt(prompt, format=AnswerStructure.model_json_schema())
    y_hat = AnswerStructure.model_validate_json(ans)
    # print('y_hat:\n', y_hat)

    return get_validation_score(y, y_hat)

# print(output_score_raw_model(loaded_nets[0]))

In [None]:
def output_score_baymin(net):
    node1, node2 = pickTwoRandomNodes(net)
    # print(node1, node2)
    # print('Two random nodes:', node1, node2)

    y = get_ground_truth(net, node1, node2)
    # print('y:\n', y)

    template = ""
    bn_helper = BnHelper(function_name="is_XY_connected")

    connected = bn_helper.is_XY_connected(net, node1, node2)
    if connected:
        template = get_explain_XY_dconnected(net, node1, node2)
    else:
        template = get_explain_XY_dseperated(net, node1, node2)


    prompt = f"In this Bayesian Network:\n{targetBN}\n"
    prompt += f"Is changing the evidence of {node1} going to change the probability of {node2}?"
    prompt += f"Answer exactly as this template: {template}"

    y_hat = answer_this_prompt(prompt, format=AnswerStructure.model_json_schema(), model=MODEL)
    # print('y_hat:\n', y_hat)

    return get_validation_score(y, y_hat)

In [40]:
print(output_score_baymin(nets_collection[0]))

E A
1


In [41]:
net_5 = build_random_bn(
    n_nodes=5,
    cpt_mode="random",
    avg_edges_per_node=3,
    max_in_degree=3,
    sprinkle_motifs=5,
    # save_path=output_path
)

print('net_5 done')

net_10 = build_random_bn(
    n_nodes=10,
    cpt_mode="random",
    avg_edges_per_node=6,
    max_in_degree=6,
    sprinkle_motifs=5
)

print('net_10 done')

net_30 = build_random_bn(
    n_nodes=30,
    cpt_mode="random",
    avg_edges_per_node=8,
    max_in_degree=8,
    sprinkle_motifs=5
)

print('net_30 done')

net_60 = build_random_bn(
    n_nodes=60,
    cpt_mode="random",
    avg_edges_per_node=6,
    max_in_degree=6,
    sprinkle_motifs=3
)

print('net_60 done')

net_5 done
net_10 done
net_30 done
net_60 done


In [None]:
def test_model(net, get_score_func):
  avg_score = 0
  for i in range(30):
    score = get_score_func(net)
    avg_score += score
    print(f"score {i}: {score}")
  avg_score /= 30
  return avg_score

result = {}
print('Net_5, Raw Model')
raw_net_5_score = test_model(net_5, output_score_raw_model)
result['raw_net_5_score'] = raw_net_5_score


raw_net_10_score = test_model(net_10, output_score_raw_model)
result['raw_net_10_score'] = raw_net_10_score

raw_net_30_score = test_model(net_30, output_score_raw_model)
result['raw_net_30_score'] = raw_net_30_score

raw_net_60_score = test_model(net_60, output_score_raw_model)
result['raw_net_60_score'] = raw_net_60_score

baymin_net_5_score = test_model(net_5, output_score_baymin)
result['baymin_net_5_score'] = baymin_net_5_score

baymin_net_10_score = test_model(net_10, output_score_baymin)
result['baymin_net_10_score'] = baymin_net_10_score

baymin_net_30_score = test_model(net_30, output_score_baymin)
result['baymin_net_30_score'] = baymin_net_30_score

baymin_net_60_score = test_model(net_60, output_score_baymin)
result['baymin_net_60_score'] = baymin_net_60_score

RandomNet
score 0: 1
score 1: 1
score 2: 1
score 3: 1
score 4: 1
score 5: 1
score 6: 1
score 7: 1
score 8: 1
score 9: 1
score 10: 1
score 11: 1
score 12: 1
score 13: 1
score 14: 1
score 15: 1
score 16: 1
score 17: 1
score 18: 1
score 19: 1
score 20: 1
score 21: 1
score 22: 1
score 23: 1
score 24: 1
score 25: 1
score 26: 1
score 27: 1
score 28: 1
score 29: 1
RandomNet
score 0: 1
score 1: 1
score 2: 1
score 3: 1
score 4: 1
score 5: 1
score 6: 1
score 7: 1
score 8: 1
score 9: 1
score 10: 1
score 11: 1
score 12: 1
score 13: 1
score 14: 0
score 15: 1
score 16: 1
score 17: 1
score 18: 1
score 19: 1
score 20: 1
score 21: 1
score 22: 1
score 23: 1
score 24: 1
score 25: 1
score 26: 1
score 27: 1
score 28: 1
score 29: 1
RandomNet
score 0: 1
score 1: 1
score 2: 1
score 3: 1
score 4: 1
score 5: 1
score 6: 1
score 7: 1
score 8: 1
score 9: 1
score 10: 1
score 11: 1
score 12: 1
score 13: 1
score 14: 1
score 15: 1
score 16: 1
score 17: 1
score 18: 1
score 19: 1
score 20: 1
score 21: 1
score 22: 1
scor

In [None]:
# plot the result