In [3]:
import threading
from typing import List, Optional, Union

import torch
from IPython.display import display, Markdown, clear_output
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TextIteratorStreamer,
)

MODEL_ID = "openai/gpt-oss-20b"

# Load tokenizer & model once; put weights on GPU(s) automatically.
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",
    torch_dtype="auto",
)
# Some tokenizers don’t define a pad token; set it to eos to avoid warnings.
if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
    tokenizer.pad_token_id = tokenizer.eos_token_id


def _build_inputs_from_prompt_or_messages(
    prompt: Optional[str] = None,
    messages: Optional[List[dict]] = None,
):
    """
    If `messages` is provided, uses the model's chat template.
    Otherwise, wraps `prompt` into a single-user message.
    """
    if messages is None:
        if prompt is None:
            raise ValueError("Provide either `prompt` or `messages`.")
        messages = [{"role": "user", "content": prompt}]

    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt",
        return_dict=True,
    )
    return inputs.to(model.device)


def answer_this_prompt(
    prompt: Optional[str] = None,
    *,
    messages: Optional[List[dict]] = None,
    stream: bool = False,
    temperature: float = 0.0,
    max_new_tokens: int = 200,
    top_p: float = 1.0,
    top_k: int = 0,
    repetition_penalty: float = 1.0,
) -> str:
    """
    Generate with gpt-oss via Hugging Face transformers.

    Args:
        prompt: simple string prompt (ignored if `messages` is provided)
        messages: list of chat turns [{"role": "user"/"assistant"/"system", "content": "..."}]
        stream: if True, token-stream into the notebook output
        temperature, top_p, top_k, repetition_penalty: sampling params
        max_new_tokens: generation length

    Returns:
        The generated text (assistant reply) as a string.
    """
    inputs = _build_inputs_from_prompt_or_messages(prompt, messages)

    gen_kwargs = dict(
        **inputs,
        do_sample=(temperature > 0),
        temperature=temperature,
        top_p=top_p,
        top_k=top_k if top_k > 0 else None,
        repetition_penalty=repetition_penalty,
        max_new_tokens=max_new_tokens,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )

    if not stream:
        with torch.no_grad():
            out = model.generate(**gen_kwargs)
        # Slice off the prompt part and decode only the newly generated tokens
        new_tokens = out[0, inputs["input_ids"].shape[-1]:]
        return tokenizer.decode(new_tokens, skip_special_tokens=True)

    # --- streaming path (notebook-friendly) ---
    streamer = TextIteratorStreamer(
        tokenizer,
        skip_prompt=True,
        skip_special_tokens=True,
    )
    gen_kwargs_stream = {**gen_kwargs, "streamer": streamer}

    # Run generation in a background thread and consume tokens as they arrive
    def _gen():
        with torch.no_grad():
            model.generate(**gen_kwargs_stream)

    thread = threading.Thread(target=_gen, daemon=True)
    thread.start()

    full_text = ""
    for chunk in streamer:
        full_text += chunk
        clear_output(wait=True)
        display(Markdown(full_text))

    thread.join()
    return full_text

Fetching 40 files:   0%|          | 0/40 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
# reply = answer_this_prompt("How many rs are in the word 'strawberry'?",
#                            stream=False, max_new_tokens=200)
# print(reply)

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


analysisWe need to interpret the question: "How many rs are in the word 'strawberry'?" The word 'strawberry' spelled s t r a w b e r r y. Count the letter 'r'. There are two 'r's: one after t, and then two consecutive r's before y. Actually 'strawberry' has 's', 't', 'r', 'a', 'w', 'b', 'e', 'r', 'r', 'y'. So there are 3 r's? Wait: Let's write: s t r a w b e r r y. That's 10 letters. The 'r's: at position 3, 8, 9. So 3 r's. But check: 'strawberry' spelled s t r a w b e r r y. Yes, there are 3 r's. So answer: 3. But maybe trick: The word 'st


In [4]:
# msgs = [
#     {"role": "system", "content": "You are a concise assistant."},
#     {"role": "user", "content": "How many rs are in the word 'strawberry'?"},
# ]
# reply = answer_this_prompt(messages=msgs, stream=True, temperature=0.0, max_new_tokens=50)

analysisWe need to answer: "How many rs are in the word 'strawberry'?" The word 'strawberry' spelled s t r a w b e r r y. Count 'r's: there are two '

In [2]:
bn_path = "./nets/collection/"
from bni_netica.bni_netica import *
from bni_netica.bni_netica import Net

Loading Netica


In [4]:
CancerNeapolitanNet = Net(bn_path+"Cancer Neapolitan.neta")
ChestClinicNet = Net(bn_path+"ChestClinic.neta")
ClassifierNet = Net(bn_path+"Classifier.neta")
CoronaryRiskNet = Net(bn_path+"Coronary Risk.neta")
FireNet = Net(bn_path+"Fire.neta")
MendelGeneticsNet = Net(bn_path+"Mendel Genetics.neta")
RatsNet = Net(bn_path+"Rats.neta")
WetGrassNet = Net(bn_path+"Wet Grass.neta")
RatsNoisyOr = Net(bn_path+"Rats_NoisyOr.dne")
Derm = Net(bn_path+"Derm 7.9 A.dne")

In [1]:
from pydantic import BaseModel
import outlines
from outlines.models.transformers import Transformers
from transformers import AutoModelForCausalLM, AutoTokenizer

class Query(BaseModel):
    fromNode: str
    toNode: str

MODEL_ID = "openai/gpt-oss-20b"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",      
    torch_dtype="auto",
    bos_token_id=tokenizer.bos_token_id
)

Fetching 40 files:   0%|          | 0/40 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
ol_model = Transformers(model, tokenizer)

gen = outlines.generate.json(ol_model, Query)
prompt = "In this Bayesian Network, is Paris connected to Berlin? What are the two nodes mentioned?"

result = gen(prompt)
print(result)
print(result.model_dump())

fromNode='Germany' toNode='Paris1'
{'fromNode': 'Germany', 'toNode': 'Paris1'}


In [6]:
bn_path = "./nets/collection/"
from bni_netica.bni_netica import *
from bni_netica.bni_netica import Net

CancerNeapolitanNet = Net(bn_path+"Cancer Neapolitan.neta")
ChestClinicNet = Net(bn_path+"ChestClinic.neta")
ClassifierNet = Net(bn_path+"Classifier.neta")
CoronaryRiskNet = Net(bn_path+"Coronary Risk.neta")
FireNet = Net(bn_path+"Fire.neta")
MendelGeneticsNet = Net(bn_path+"Mendel Genetics.neta")
RatsNet = Net(bn_path+"Rats.neta")
WetGrassNet = Net(bn_path+"Wet Grass.neta")
RatsNoisyOr = Net(bn_path+"Rats_NoisyOr.dne")
Derm = Net(bn_path+"Derm 7.9 A.dne")

Loading Netica


In [7]:
BN = ""

for node in ChestClinicNet.nodes():
	# print(f"{node.name()} -> {[child.name() for child in node.children()]}")
	BN += f"{node.name()} -> {[child.name() for child in node.children()]}\n"
	
print(BN)

VisitAsia -> ['Tuberculosis']
Tuberculosis -> ['TbOrCa']
Smoking -> ['Cancer', 'Bronchitis']
Cancer -> ['TbOrCa']
TbOrCa -> ['XRay', 'Dyspnea']
XRay -> []
Bronchitis -> ['Dyspnea']
Dyspnea -> []



In [9]:
PROMPT = """In this Bayesian Networks: {BN}, is {fromNode} connected to {toNode}?"""
fromNode = "Smoking"
toNode = "Cancer"
input = PROMPT.format(BN=BN, fromNode=fromNode, toNode=toNode) 

In [10]:
ans = gen(input)
print(ans)
print(ans.fromNode)
print(ans.toNode)

fromNode='Smoking' toNode='Cancer'
Smoking
Cancer


In [14]:
def isConnected(net, fromNode, toNode):
  relatedNodes = net.node(fromNode).getRelated("d_connected")
  for node in relatedNodes:
    if node.name() == toNode:
      return True
  return False

import random 
def pickTwoRandomNodes(net):
    nodes = net.nodes()
    if len(nodes) < 2:
        return None, None
    node1, node2 = random.sample(nodes, 2)
    return node1.name(), node2.name()

def printNet(net):
    for node in net.nodes():
        print(f"{node.name()} -> {[child.name() for child in node.children()]}")

def correctIdentification(net, fromNode, toNode):
    BN = ""
    
    for node in net.nodes():
        BN += f"{node.name()} -> {[child.name() for child in node.children()]}\n"

    input = PROMPT.format(BN=BN, fromNode=fromNode, toNode=toNode)
    ans = gen(input)
    queryFromNode = ans.fromNode
    queryToNode = ans.toNode

    return queryFromNode == fromNode and queryToNode == toNode, queryFromNode, queryToNode

In [15]:
listOfNets = [CancerNeapolitanNet, ChestClinicNet, ClassifierNet, CoronaryRiskNet, FireNet, MendelGeneticsNet, RatsNet, WetGrassNet, RatsNoisyOr, Derm]
total = 0
correct = 0

for net in listOfNets:
    for _ in range(10):
      total += 1
      fromNode, toNode = pickTwoRandomNodes(net)
      if fromNode and toNode:
          correctIdentified, queryFromNode, queryToNode = correctIdentification(net, fromNode, toNode)
          if correctIdentified:
            correct += 1
          else:
            print(f"Incorrect identification for {net.name()}")
            printNet(net)
            print()
            print("Expected:", fromNode, "->", toNode)
            print("Reality:", queryFromNode, "->", queryToNode)
            print("----------------------------------------------------")

print(f"Total: {total}, Correct: {correct}, Accuracy: {correct/total:.2%}")

Incorrect identification for Cancer_Neapolitan
A -> ['B', 'C']
B -> ['D']
C -> ['D', 'E']
D -> []
E -> []

Expected: E -> C
Reality: C -> E
----------------------------------------------------
Incorrect identification for Cancer_Neapolitan
A -> ['B', 'C']
B -> ['D']
C -> ['D', 'E']
D -> []
E -> []

Expected: C -> E
Reality: C -> Z
----------------------------------------------------
Incorrect identification for Cancer_Neapolitan
A -> ['B', 'C']
B -> ['D']
C -> ['D', 'E']
D -> []
E -> []

Expected: E -> A
Reality: A -> E
----------------------------------------------------
Incorrect identification for Cancer_Neapolitan
A -> ['B', 'C']
B -> ['D']
C -> ['D', 'E']
D -> []
E -> []

Expected: E -> D
Reality: E -> J
----------------------------------------------------
Incorrect identification for Cancer_Neapolitan
A -> ['B', 'C']
B -> ['D']
C -> ['D', 'E']
D -> []
E -> []

Expected: B -> A
Reality: B -> Cinicio-B
----------------------------------------------------
Incorrect identification fo