In [1]:
import sys
import os

# Add the parent directory to Python path so we can import modules from the project root
# In Jupyter notebooks, __file__ is not defined, so we use getcwd() and navigate up
current_dir = os.getcwd()
# If we're in the testing directory, go up one level to get to project root
if current_dir.endswith('/testing'):
    project_root = os.path.dirname(current_dir)
else:
    # If we're already in project root, use current directory
    project_root = current_dir

if project_root not in sys.path:
    sys.path.insert(0, project_root)

print(f"Current working directory: {current_dir}")
print(f"Added project root to Python path: {project_root}")
print(f"Python path now includes: {[p for p in sys.path if 'llm-bn' in p]}")

import requests, json
from IPython.display import display, Markdown, clear_output
from ollama_helper.ollama_helper import answer_this_prompt
from bn_helpers.get_structures_print_tools import get_nets, printNet, get_BN_structure, get_BN_node_states
from bn_helpers.bn_helpers import BnToolBox
from bn_helpers.utils import get_path, set_findings, temporarily_set_findings
from benchmarking.data_utils import load_nets_from_parquet
from ollama_helper.ollama_helper import answer_this_prompt
from ollama_helper.prompts import TAKE_QUIZ_PROMPT
from bn_helpers.bn_helpers import BnToolBox
from bn_helpers.get_structures_print_tools import get_BN_structure
from bn_helpers.tool_agent import get_answer_from_tool_agent, chat_with_tools
from benchmarking.quiz_generator import (
    create_dependency_quiz, create_common_cause_quiz, create_common_effect_quiz, create_blocked_evidence_quiz, 
    create_evidence_change_relationship_quiz, create_probability_quiz, create_highest_impact_evidence_quiz)
from benchmarking.benchmarking_utils import pick_two_random_nodes, fake_random_nodes
from pydantic_ai import Agent
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic_ai.providers.ollama import OllamaProvider
from bn_helpers.constants import MODEL, MODEL_QUIZ, OLLAMA_URL
from benchmarking.question_types import (DEPENDENCY_QUESTIONS, COMMON_CAUSE_QUESTIONS, COMMON_EFFECT_QUESTIONS, BLOCKED_EVIDENCES_QUESTIONS, 
EVIDENCE_CHANGE_RELATIONSHIP_QUESTIONS, PROBABILITY_QUESTIONS, HIGHEST_IMPACT_EVIDENCE_QUESTIONS)
from pydantic import BaseModel
from benchmarking.model_evaluator import (dependency_test, validate_quiz_answer, model_do_quiz, 
two_nodes_question, probability_question, QuizAnswer, 
debug_print_elementary_quiz, debug_print_numerical_quiz, export_quiz_samples_to_csv)
import asyncio
import time
from ollama_helper.ollama_helper import get_answer_from_ollama, get_quiz_answer_from_thinking_model


MODEL_QUIZ = "qwen2.5:7b"
# MODEL_TOOLS = "gpt-oss:latest"
# MODEL_TOOLS = "qwen3:8b"
MODEL_TOOLS = "llama3.1:70b"
# MODEL_TOOLS = MODEL_QUIZ
# print(generate_chat("Print [A, C] with no additional text", model="qwen2.5:3b", num_predict=5))
# print(answer_this_prompt("Print [A, C] with no additional text", model="qwen2.5:7b", format=AnswerStructure.model_json_schema()))
# Use the project root we established earlier to create the correct path

# model = MODEL_TOOLS
# prompt = "Answer a"
# max_tokens = 1000
# STREAM = True

# try:
#     loop = asyncio.get_event_loop()
#     if loop.is_running():
#         start_time = time.time()
#         ans = loop.run_until_complete(get_quiz_answer_from_thinking_model(prompt, model=model, max_tokens=max_tokens, format=QuizAnswer, stream=STREAM))
#         raw_response_time = time.time() - start_time
#     else:
#         start_time = time.time()
#         ans = loop.run_until_complete(get_quiz_answer_from_thinking_model(prompt, model=model, max_tokens=max_tokens, format=QuizAnswer, stream=STREAM))
#         raw_response_time = time.time() - start_time
# except RuntimeError:
#     start_time = time.time()
#     ans = asyncio.run(get_quiz_answer_from_thinking_model(prompt, model=model, max_tokens=max_tokens, format=QuizAnswer, stream=STREAM))
#     raw_response_time = time.time() - start_time

# print(ans)
# print(raw_response_time)

Current working directory: /fs04/scratch2/lb64/projects/llm-bn/testing
Added project root to Python path: /fs04/scratch2/lb64/projects/llm-bn
Python path now includes: ['/fs04/scratch2/lb64/projects/llm-bn', '/home/mvo1/lb64_scratch/miniconda3/envs/llm-bn/lib/python313.zip', '/home/mvo1/lb64_scratch/miniconda3/envs/llm-bn/lib/python3.13', '/home/mvo1/lb64_scratch/miniconda3/envs/llm-bn/lib/python3.13/lib-dynload', '/home/mvo1/lb64_scratch/miniconda3/envs/llm-bn/lib/python3.13/site-packages']
Loading Netica


In [3]:
data_output = os.path.join(project_root, "benchmarking", "data")
print(f"Loading nets from: {data_output}")
net_5, net_10, net_30, net_60 = load_nets_from_parquet(os.path.join(data_output, "nets_dataset.parquet"))

from benchmarking.model_evaluator import dependency_test, common_cause_test, common_effect_test, blocked_evidence_test, evidence_change_relationship_test, probability_test, highest_impact_evidence_test


print(f"Net 5:")
printNet(net_5)
print()

print('Net 10:')
printNet(net_10)
print()

print('Net 30:')
printNet(net_30)
print()

print('Net 60:')
printNet(net_60)
print()

export_quiz_samples_to_csv(
    net_5,
    num_questions=5,
    output_file_path="quiz_samples_net5.csv",
    include_sets=["dependency", "common_cause", "common_effect", "blocked_evidence", "evidence_change_relationship", "probability"]
)

# For elementary tests
debug_print_elementary_quiz(
    net=net_5,
    question_set=DEPENDENCY_QUESTIONS,
    create_quiz_function=create_dependency_quiz,
    has_evidence=False,
    num_questions=2
)
print()

debug_print_elementary_quiz(
    net=net_5,
    question_set=COMMON_CAUSE_QUESTIONS,
    create_quiz_function=create_common_cause_quiz,
    has_evidence=False,
    num_questions=2
)
print()

debug_print_elementary_quiz(
    net=net_5,
    question_set=COMMON_EFFECT_QUESTIONS,
    create_quiz_function=create_common_effect_quiz,
    has_evidence=False,
    num_questions=2
)
print()

debug_print_elementary_quiz(
    net=net_5,
    question_set=BLOCKED_EVIDENCES_QUESTIONS,
    create_quiz_function=create_blocked_evidence_quiz,
    has_evidence=False,
    num_questions=2
)
print()

debug_print_elementary_quiz(
    net=net_5,
    question_set=EVIDENCE_CHANGE_RELATIONSHIP_QUESTIONS,
    create_quiz_function=create_evidence_change_relationship_quiz,
    has_evidence=False,
    num_questions=2
)
print()

debug_print_numerical_quiz(
    net=net_5,
    question_set=PROBABILITY_QUESTIONS,
    create_quiz_function=create_probability_quiz,
    has_evidence=True,
    num_questions=2
)


# print('Dependency Test:--------------------------------')
# dependency_test(net_5, num_questions=1)

# print('Common Cause Test:--------------------------------')
# common_cause_test(net_5, num_questions=1)

# print('Common Effect Test:--------------------------------')
# common_effect_test(net_5, num_questions=1)

# print('Blocked Evidence Test:--------------------------------')
# blocked_evidence_test(net_5, num_questions=1)

# print('Evidence Change Relationship Test:--------------------------------')
# evidence_change_relationship_test(net_5, num_questions=1)

# print('Probability Test:--------------------------------')
# probability_test(net_5, num_questions=1)

# print('Highest Impact Evidence Test:--------------------------------')
# highest_impact_evidence_test(net_5, num_questions=1)

Loading nets from: /fs04/scratch2/lb64/projects/llm-bn/benchmarking/data
Loaded 4 nets from /fs04/scratch2/lb64/projects/llm-bn/benchmarking/data/nets_dataset.parquet
Net 5:
C -> ['E', 'D', 'A', 'B']
E -> ['D', 'A', 'B']
D -> ['A']
A -> ['B']
B -> []

Net 10:
H -> ['I', 'C', 'A', 'F', 'D', 'B', 'G', 'J']
I -> ['C', 'A', 'F', 'D', 'B', 'G', 'J', 'E']
C -> ['A', 'F', 'D', 'B', 'E']
A -> ['F', 'B', 'J', 'E']
F -> ['G', 'J', 'E']
D -> ['B', 'G', 'E']
B -> ['G', 'J']
G -> ['J']
J -> []
E -> []

Net 30:
M -> ['P', 'A1', 'A', 'F', 'Y', 'Q', 'C', 'L', 'K']
P -> ['T', 'C1', 'V', 'Y', 'R', 'E', 'I']
U -> ['A1', 'X', 'H', 'T', 'Z', 'C1', 'F', 'V', 'S']
A1 -> ['Z', 'Y', 'Q', 'C']
D -> ['A', 'Z', 'E', 'I']
X -> ['A', 'Z', 'C1', 'F', 'R', 'E', 'C', 'L', 'D1']
A -> ['G', 'W', 'F', 'V', 'E', 'Q', 'C', 'L', 'B']
N -> ['J', 'B1', 'F', 'I', 'C', 'L']
H -> ['G', 'Z', 'O', 'L', 'D1']
G -> ['J', 'Z', 'V']
J -> ['W', 'T', 'C1', 'B1', 'V', 'S', 'I', 'B']
W -> ['C1', 'E', 'I', 'K']
T -> ['Z', 'O', 'C1', 'V', '

In [4]:
print("\n=== Network ===\n")
# nets = get_nets()
# myNet = nets[1]

printNet(net_5)
print()
print(get_BN_node_states(net_5))


question = (
    # "Is Visitinz Azia change the probability of Smokng?"
    # "Which symtome has a higher impact on Lung Cancer knowing that person is visiting Asia?"
    # "Is changing the evidence of A going to change the probability of B?"
    # "What is the common effect of C and B?"
    # "What is the probability of XRay given Lung Cancer, Smoking and Visit Asiaaa?"
    "Is the relationship between C and E affected by the evidence of B?"
    # "What is the probability of A given B is increased and C is present?"
    # "Is the relationship between Vizit Azia and Lung Cancr get affected when we observe Tuberculosis or Cancer?"
    # "What set of evidences would block the path between B and C?"

)
MODEL = "llama3.1:70b"
print("\n=== USER QUERY ===\n", question, "\n")
print("\n=== BayMin Answer ===\n")
# chat_with_tools(net_5, question, max_tokens=1000, isDebug=True, model=MODEL, isTesting=False)
get_answer_from_tool_agent(net_5, question, max_tokens=5000, isTesting=False, isDebug=True)
# answer = get_answer_from_tool_agent(myNet, question, max_tokens=5000)



=== Network ===

C -> ['E', 'D', 'A', 'B']
E -> ['D', 'A', 'B']
D -> ['A']
A -> ['B']
B -> []

C ['False', 'True']
E ['False', 'True']
D ['False', 'True']
A ['False', 'True']
B ['False', 'True']


=== USER QUERY ===
 Is the relationship between C and E affected by the evidence of B? 


=== BayMin Answer ===

[BayMin] tool_call #1: check_evidences_change_relationship_between_two_nodes({'evidence': [{'B': 'True'}], 'node1': 'C', 'node2': 'E'})
[BayMin] tool_result #1: {'result': 'No - conditioning on B=True does not change the dependency between C and E. Before observing B=True, they were d-connected. After observing all evidence, they remain d-connected. Sequence: +B => d-connected.'}
[BayMin] tool_call #1: check_d_connected({'from_node': 'C', 'to_node': 'E'})
[BayMin] tool_result #1: {'result': "Yes, C is d-connected to E, which means that entering evidence for C would change the probability of E and vice versa. They d-connected through the following path: ['C', 'E']"}


'No - conditioning on B=True does not change the dependency between C and E. Before observing B=True, they were d-connected. After observing all evidence, they remain d-connected. Sequence: +B => d-connected.'

In [None]:
import requests, json
from pydantic import BaseModel
from IPython.display import display, Markdown, clear_output

MODEL = "gpt-oss-bn-json"
def answer_this_prompt(prompt, stream=False, model=MODEL, temperature=0, format=None):
    payload = {
        "prompt": prompt,
        "model": model,
        "temperature": temperature,
        "max_new_tokens": 50, # only when stream = False work
        "format": format
    }
    headers = {
        'Content-Type': 'application/json'
    }
    endpoint = "http://localhost:11434/api/generate"

    # Send the POST request with streaming enabled
    with requests.post(endpoint, headers=headers, json=payload, stream=True) as response:
        if response.status_code == 200:
            try:
                # Process the response incrementally
                full_response = ""
                for line in response.iter_lines(decode_unicode=True):
                    if line.strip():  # Skip empty lines
                        response_json = json.loads(line)
                        chunk = response_json.get("response", "")
                        full_response += chunk
                        
                        # Render the response as Markdown
                        if stream:
                            clear_output(wait=True)
                            display(Markdown(full_response))
                        
                return full_response
            except json.JSONDecodeError as e:
                return "Failed to parse JSON: " + str(e)
        else:
            return "Failed to retrieve response: " + str(response.status_code)

class BnToolBox(BaseModel):
    fnName: str

def add(a=5, b=6):
    print('Go to function successfully')
    return a + b

output = answer_this_prompt('output this function name: add', stream=True, format=BnToolBox.model_json_schema())

bn_tool_box = BnToolBox.model_validate_json(output)
if bn_tool_box.fnName == 'add':
    print(add())

{"fnName":"add"}



Go to function successfully
11


In [None]:
bn_path = "./nets/collection/"
from bni_netica.bni_netica import *
from bni_netica.bni_netica import Net

CancerNeapolitanNet = Net(bn_path+"Cancer Neapolitan.neta")
ChestClinicNet = Net(bn_path+"ChestClinic.neta")
ClassifierNet = Net(bn_path+"Classifier.neta")
CoronaryRiskNet = Net(bn_path+"Coronary Risk.neta")
FireNet = Net(bn_path+"Fire.neta")
MendelGeneticsNet = Net(bn_path+"Mendel Genetics.neta")
RatsNet = Net(bn_path+"Rats.neta")
WetGrassNet = Net(bn_path+"Wet Grass.neta")
RatsNoisyOr = Net(bn_path+"Rats_NoisyOr.dne")
Derm = Net(bn_path+"Derm 7.9 A.dne")

BN = ""
for node in FireNet.nodes():
    BN += f"{node.name()} -> {[child.name() for child in node.children()]}\n"

def isConnected(net, fromNode, toNode):
  relatedNodes = net.node(fromNode).getRelated("d_connected")
  for node in relatedNodes:
    if node.name() == toNode:
      return True
  return False


BN = ""
for node in FireNet.nodes():
    BN += f"{node.name()} -> {[child.name() for child in node.children()]}\n"

PROMPT = "Within {BN}, is {fromNode} an ancestor of {toNode}?"
fromNode = 'Alarm'
toNode = 'Fire'

PROMPT = PROMPT.format(BN=BN, fromNode=fromNode, toNode=toNode)
inputPrompt = PROMPT + 'if user ask anything related to are these two nodes connected to each other, output this function name: isConnected'
output2 = answer_this_prompt(inputPrompt, stream=True, format=BnToolBox.model_json_schema())

{"fnName":"isConnected"}


In [None]:
questions = [
    """In this Bayesian Networks: {BN}, is {fromNode} connected to {toNode}?""",
    """In this Bayesian Networks: {BN}, is {fromNode} connected to {toNode}? What are the two nodes mentioned?""",
    "Within the Bayesian Network {BN}, does a path exist from {fromNode} to {toNode}?",
    "In the graph {BN}, can information flow from {fromNode} to {toNode}?", # top perform 
    "Are {fromNode} and {toNode} dependent in the Bayesian Network {BN}?",
    "In {BN}, is there any direct or indirect connection between {fromNode} and {toNode}?",
    "Can {fromNode} influence {toNode} in the Bayesian Network {BN}?",
    "Is {toNode} reachable from {fromNode} in the structure of {BN}?",
    "Does {BN} contain a path that links {fromNode} to {toNode}?",
    "Are there any edges—direct or through other nodes—connecting {fromNode} and {toNode} in {BN}?",
    "Is {toNode} conditionally dependent on {fromNode} in the Bayesian Network {BN}?",
    "Within {BN}, is {fromNode} an ancestor of {toNode}?"
]

In [None]:
listOfNets = [CancerNeapolitanNet, ChestClinicNet, ClassifierNet, CoronaryRiskNet, FireNet, MendelGeneticsNet, RatsNet, WetGrassNet, RatsNoisyOr, Derm]

for question in questions:
  total = 0
  correct = 0
  print(f"Question: {question.format(BN=net.name(), fromNode=fromNode, toNode=toNode)}")
  for net in listOfNets:
      for _ in range(5):
        total += 1
        fromNode, toNode = pick_two_random_nodes(net)
        if fromNode and toNode:
            
            correctIdentified, queryFromNode, queryToNode = correctIdentification(question, net, fromNode, toNode)
            if correctIdentified:
              correct += 1
            else:
              print(f"Incorrect identification for {net.name()}")
              printNet(net)
              print()
              print("Expected:", fromNode, "->", toNode)
              print("Reality:", queryFromNode, "->", queryToNode)
              print("----------------------------------------------------")

  print(f"Total: {total}, Correct: {correct}, Accuracy: {correct/total:.2%}")
  print("<------------------------------------------------------------------------->")

In [None]:
# from bni_netica.support_tools import get_nets, printNet, get_BN_structure, get_BN_node_states
# from bni_netica.bn_helpers import BnHelper, QueryTwoNodes, ParamExtractor
# from ollama_helper.ollama_helper import answer_this_prompt
# from bni_netica.scripts import HELLO_SCRIPT, MENU_SCRIPT, GET_FN_SCRIPT

# # PROMPT = """Consider this question: '{question}'. 
# # What are the two nodes in this question? 
# # Make sure to correctly output the names of nodes exactly as mentioned in the network and in the order as the question mentioned. 
# # For example, if the question mentioned "A and B" then the two nodes are fromNode: A, toNode: B; or if the question mentioned "Smoking and Cancer" then the two nodes are fromNode: Smoking, toNode: Cancer. 
# # Answer in JSON format."""

# def query_menu(BN_string, net):
#     """Input: BN: string, net: object"""
#     pre_query = f"""In this Bayesian Network: 
# {BN_string}
# """
#     user_query = input("Enter your query here: ")
#     get_fn_prompt = pre_query + "\n" + user_query + GET_FN_SCRIPT

#     get_fn = answer_this_prompt(get_fn_prompt, format=BnHelper.model_json_schema())
#     print("\nBayMin:")
#     print(get_fn)

#     get_fn = BnHelper.model_validate_json(get_fn)
#     fn = get_fn.function_name

#     bn_helper = BnHelper(function_name=fn)
#     param_extractor = ParamExtractor()
    
#     if fn == "is_XY_dconnected":
        
#         get_params = param_extractor.extract_two_nodes_from_query(pre_query, user_query)
#         print(get_params)

#         ans = bn_helper.is_XY_dconnected(net, get_params.from_node, get_params.to_node)

#         if ans:
#             template = f"Yes, {get_params.from_node} is d-connected to {get_params.to_node}, which means that entering evidence for {get_params.from_node} would change the probability of {get_params.to_node} and vice versa."
#         else:
#             template = f"No, {get_params.from_node} is not d-connected to {get_params.to_node}, which means that entering evidence for {get_params.from_node} would not change the probability of {get_params.to_node}."
        
#         explain_prompt = f"""User asked: In this '{BN_string}', '{user_query}'. We use {fn} function and the output is: '{ans}'. Follow this exact template to provide the answer: '{template}'."""
#         print(answer_this_prompt(explain_prompt))

#     print()
    
#     print(MENU_SCRIPT)
#     choice = int(input("Enter your choice: "))
#     print()

#     if choice == 1:
#         input("Enter your query here: ")
#         print('This is a sample answer.\n')
#     elif choice == 2:
#         input("Enter your query here: ")
#         print('This is a sample answer.\n')
#     elif choice == 3:
#         print("Not yet implemented\n")
#         return 
#     elif choice == 4:
#         print("Goodbye!\n")
#         return    

# def main():
#     print(HELLO_SCRIPT)
#     nets = get_nets()

    
#     for i, net in enumerate(nets):
#         print(f"{i}: {net.name()}")

#     print()
#     choice = int(input("Enter the number of the network you want to use: "))
#     print()
#     if choice < 0 or choice >= len(nets):
#         print("Invalid choice. Exiting.")
#         return
    
#     net = nets[choice]
#     print(f"You chose: {net.name()}")
#     printNet(net)
#     print('\nBN states:\n')
#     print(get_BN_node_states(net))

#     BN_string = get_BN_structure(net)
#     query_menu(BN_string=BN_string, net=net)


# if __name__ == "__main__":
#     main()
