# Build Net

In [17]:
import os
from bni_netica.bni_netica import Net
from benchmarking.data_generator import build_random_bn
from bn_helpers.get_structures_print_tools import get_nets, printNet, get_BN_structure, printPath
from benchmarking.data_utils import save_nets_to_parquet, load_nets_from_parquet
from benchmarking.benchmarking_utils import pickTwoRandomNodes
from bn_helpers.bn_helpers import AnswerStructure, BnHelper
from ollama.prompt import answer_this_prompt
from bn_helpers.utils import get_path


In [18]:
output_path = "./nets/outputs/"

# net3 = build_random_bn(
#     n_nodes=12,
#     name="SkewBN",
#     cpt_mode="random",
#     dirichlet_alpha=0.3,     # spiky rows
#     avg_edges_per_node=1.4,
#     max_in_degree=2,
#     sprinkle_motifs=5,
#     # save_path=output_path
# )

# printNet(net3)

## Test the boundary

In [19]:
# net2 = build_random_bn(
#     n_nodes=1000,
#     name="MaxRandomBN",
#     cpt_mode="random",
#     avg_edges_per_node=2,
#     max_in_degree=2,
#     sprinkle_motifs=5,
#     # save_path=output_path
# )

# printNet(net2)


# Build Dataset

In [20]:
# import all nets from the collection
nets_collection = get_nets()

# print all nets
for net in nets_collection:
    printNet(net)
    print()

A -> ['B', 'C']
B -> ['D']
C -> ['D', 'E']
D -> []
E -> []

VisitAsia -> ['Tuberculosis']
Tuberculosis -> ['TbOrCa']
Smoking -> ['Cancer', 'Bronchitis']
Cancer -> ['TbOrCa']
TbOrCa -> ['XRay', 'Dyspnea']
XRay -> []
Bronchitis -> ['Dyspnea']
Dyspnea -> []

Class -> ['Obsv', 'Result']
Obsv -> ['Classifier']
Classifier -> ['Result']
Result -> []

Sex -> ['Smoking_status', 'LVH', 'Coronary_artery_disease']
Smoking_status -> ['Coronary_artery_disease']
Systolic_Blood_Pressure -> ['LVH', 'Coronary_artery_disease']
Age -> ['LVH', 'Coronary_artery_disease']
LVH -> ['Coronary_artery_disease']
Diabetes_mellitus -> ['Coronary_artery_disease']
Total_chol -> ['Coronary_artery_disease']
HDL_Status -> ['Coronary_artery_disease']
Coronary_artery_disease -> []

Tampering -> ['Alarm']
Fire -> ['Alarm', 'Smoke']
Alarm -> ['Leaving']
Leaving -> ['Report']
Smoke -> []
Report -> []

P1 -> ['C', 'Color_P1']
P2 -> ['C', 'Color_P2']
C -> ['Color_C']
Color_P1 -> []
Color_C -> []
Color_P2 -> []

Ecstazine -> ['N

In [21]:
data_output = "./benchmarking/data"

In [22]:
# # Generate 500 random nets
# nets = nets_collection + \
# [ 
#     build_random_bn(n_nodes=i,
#         cpt_mode="random",
#         avg_edges_per_node=2,
#         max_in_degree=2,
#         sprinkle_motifs=5
#     )
#     for i in range(3,800)
# ]

# # Save them
# save_nets_to_parquet(nets, os.path.join(data_output, "nets_dataset_800.parquet"))

In [23]:
LOAD_NETS = False
# Load them back
if LOAD_NETS:
    loaded_nets = load_nets_from_parquet(os.path.join(data_output, "nets_dataset.parquet"))


In [24]:
# for net in loaded_nets[:20]:
#   printNet(net)
#   print()

# Benchmark simple Query: Is D-connected?

In [25]:
from pydantic import BaseModel

class ValidateScore(BaseModel):
    score: int

def get_ground_truth(net, node1, node2):
    bn_helper = BnHelper(function_name='is_XY_dconnected')

    ans = bn_helper.is_XY_dconnected(net, node1, node2)
    if ans:
        template = f"Yes, {node1} is d-connected to {node2}, which means that entering evidence for {node1} would change the probability of {node2} and vice versa."
    else:
        template = f"No, {node1} is not d-connected to {node2}, which means that entering evidence for {node1} would not change the probability of {node2}."

    return template

def get_validation_score(y, y_hat):
    validation_prompt = f"""
    Ground truth: {y}
    LLM output: {y_hat}
    Return 1 if the LLM output is correct, otherwise return 0.
    """

    validate_score = answer_this_prompt(validation_prompt, format=ValidateScore.model_json_schema())
    validate_score = ValidateScore.model_validate_json(validate_score)
    return validate_score.score

def output_score_raw_model(net):
    node1, node2 = pickTwoRandomNodes(net)
    print('Two random nodes:', node1, node2)

    bn = get_BN_structure(net)
    prompt = f"In this Bayesian Network:\n{bn}\n"
    prompt += f"Is changing the evidence of {node1} going to change the probability of {node2}?"

    y = get_ground_truth(net, node1, node2)
    print('y:\n', y)

    ans = answer_this_prompt(prompt, format=AnswerStructure.model_json_schema())
    y_hat = AnswerStructure.model_validate_json(ans)
    print('y_hat:\n', y_hat)

    return get_validation_score(y, y_hat)

# print(output_score_raw_model(loaded_nets[0]))

In [26]:
from run import execute_query

def output_score_baymin(net):
    node1, node2 = pickTwoRandomNodes(net)
    print('Two random nodes:', node1, node2)

    y = get_ground_truth(net, node1, node2)
    print('y:\n', y)

    query = f"Is changing the evidence of {node1} going to change the probability of {node2}?"
    ans = execute_query(net, query)
    y_hat = AnswerStructure.model_validate_json(ans)
    print('y_hat:\n', y_hat)

    return get_validation_score(y, y_hat)

# print(output_score_baymin(loaded_nets[0]))

In [27]:
nets_collection = get_nets()

In [28]:
QWEN = "qwen3:1.7b"
GPT_OSS = "gpt-oss-bn-json"
MODEL = GPT_OSS

printNet(nets_collection[0])
targetNet = nets_collection[0]
targetBN = get_BN_structure(targetNet)

node1 = "B"
node2 = "C"

bn_helper = BnHelper(function_name='is_XY_connected')

ans = bn_helper.is_XY_connected(targetNet, node1, node2)
print(ans)

open_path = get_path(targetNet, node1, node2)
output_path = printPath(open_path)
print(output_path)

prompt = f"In this Bayesian Network:\n{targetBN}\n"
prompt += f"Is changing the evidence of {node1} going to change the probability of {node2}?"
prompt += f"We ran the query and got the answer: {ans}. It has the following open path: {open_path}"
prompt += f"Answer exactly as this template: 'Yes, {node1} is connected to {node2} through the following path: {output_path}.'"

# result = answer_this_prompt(prompt, format=AnswerStructure.model_json_schema(), model=MODEL)
# print(result)

# y = get_ground_truth(targetNet, node1, node2)
# print('y:\n', y)

A -> ['B', 'C']
B -> ['D']
C -> ['D', 'E']
D -> []
E -> []
True
B -> A -> C -> C


In [29]:
# GPT_OSS = "gpt-oss-bn-json"

# printNet(nets_collection[0])
# targetNet = nets_collection[0]
# targetBN = get_BN_structure(targetNet)

# node1 = "B"
# node2 = "C"

# bn_helper = BnHelper(function_name='is_XY_connected')

# ans = bn_helper.is_XY_connected(targetNet, node1, node2)
# print(ans)

# prompt = f"In this Bayesian Network:\n{targetBN}\n"
# prompt += f"Is changing the evidence of {node1} going to change the probability of {node2}?"
# prompt += f"We ran the query and got the answer: {ans}"
# prompt += f"Explain why."

# result = answer_this_prompt(prompt, format=AnswerStructure.model_json_schema(), model=GPT_OSS)
# print(result)

In [30]:
# QWEN = "qwen3:1.7b" 
# GPT_OSS = "gpt-oss-bn-json"
# fishNet = nets_collection[-1]
# printNet(fishNet)
# print()

# targetBN = get_BN_structure(fishNet)

# node1 = "Rainfall"
# node2 = "FishAbundance"

# ans = bn_helper.is_XY_connected(fishNet, node1, node2)
# print(ans)

# prompt = f"In this Bayesian Network:\n{targetBN}\n"
# prompt += f"Is changing the evidence of {node1} going to change the probability of {node2}?"
# prompt += f"We ran the query and got the answer: {ans}"
# prompt += f"Explain why."

# resultQwen = answer_this_prompt(prompt, format=AnswerStructure.model_json_schema(), model=QWEN)
# print(resultQwen)

# resultGptOss = answer_this_prompt(prompt, format=AnswerStructure.model_json_schema(), model=GPT_OSS)
# print(resultGptOss)

