# Making a tree from a list of heirarchal instructions
## Overview
This program takes a file consisting of several heirarchical lists of instructions (e.g. bullet points) and creates a JSON formatted decision tree from it.
## Tools
Pydantic and instructor are used to get properly formatted responses from the LLM. Gemma3 is the current model being used. It is hosted locally using Ollama.

In [42]:
## Imports
from __future__ import annotations
from typing import List, Optional
from enum import Enum
from pydantic import BaseModel, Field
import instructor
from openai import OpenAI

In [43]:
## Loading LLM
MODEL='gemma3'
client = instructor.from_openai(
    OpenAI(
        base_url="http://localhost:11434/v1",
        api_key="ollama",  # required, but unused
    ),
    mode=instructor.Mode.JSON_O1,
)

def complete(content: str, response_model):
    return client.chat.completions.create(
    model=MODEL,
    messages=[
        {
            "role": "user",
            "content": content,
        }
    ],
    response_model=response_model,
)

# Extracting bullet points
1. The markdown file containing the instructions is loaded
1. The chunk_instructions function is called to break up the file into lists of lines
1. Lines between ones that contain a starter substring (e.g. "Challenge") and blank lines are separated into sublists

In [44]:
## Loading instructions
INSTRUCTION_PATH: str = 'bullet_list_instructions.md'
instructions: str = open(INSTRUCTION_PATH, 'r', encoding='utf-8').read()

In [45]:
## Breaks bullet list of instructions into sublists
## Sublists are all lines between a beginner string and a newline
## A new sublist is created when a begginer string is found, even if there is no newline
def chunk_instructions(instructions: str, beginners: list[str]) -> list[list[str]]:
    lines: list[str] = instructions.split("\n")
    line_count: int = len(lines)

    sublists: list[list[str]] = []
    current_sublist: list[str] = []
    
    for i in range(line_count):
        line: str = lines[i]

        is_beginning: bool = any(beginner in line for beginner in beginners)
        is_end: bool = (line == "") or (i == line_count)
        
        if is_beginning or is_end:
            if current_sublist:
                sublists.append(current_sublist)
            current_sublist = [line] if is_beginning else []
        elif current_sublist:
            current_sublist.append(line)
    return sublists

instruction_set: list[list[str]] = chunk_instructions(instructions, ["Challenge:", "Preparation for community-based visit"])

# Tree Generation
1. Instructor is used to get data about a specific bullet point using the NodeData pydantic class
1. This class contains fields with prompts designed to get specific information from the bullet
1. The create_tree function recursively creates a tree, finding child bullets using the get_bullet_indices_under_line function

In [54]:
class ConditionClassification(BaseModel):
    reasoning: str = Field(..., description="Explain in detail why, or why not, the given line is an if-statement.")
    predicate: Optional[str] = Field(..., description="The condition that the if-statement depends on, as determed by your reasoning.")
    is_conditional: bool

class VariableExtractionResponse(BaseModel):
    reasoning: str = Field(..., description="Determine what variables are referenced in the given line. Be as specific as possible and explain your reasoning in detail.")
    variables: List[str] = Field(..., description="Based on your reasoning, make a list of the variable(s) in the line. Give the variables descriptive names.")


class LineData(BaseModel):
    condition_classification: ConditionClassification
    variable_extraction: VariableExtractionResponse


class Node(BaseModel):
    node_type: str
    desc: str
    variables: List[str]
    condition: Optional[str]
    children: list[Node]


complete("Line: \"\"\"if yes\"\"\"", ConditionClassification).model_dump_json()

'{"reasoning":"The line \\"if yes\\" is a basic conditional statement. It represents a simple condition that checks if the word \\"yes\\" is true.  This is a fundamental form of an if-statement.","predicate":"yes","is_conditional":true}'

In [47]:
def strip_bullet(bullet: str) -> str:
    ascii_bullet = bullet.encode('ascii', 'ignore').decode()
    start = 0
    for i in range(len(ascii_bullet)):
        if ascii_bullet[i] == "#":
            continue
        start = i
        break
    return ascii_bullet[start:].strip()


def get_level(bullet: str) -> int:
    level = 0
    for char in bullet:
        if char == "#":
            level += 1
        else:
            break
    return level


def get_bullet_indices_under_line(bullets: list[str], line_index: int) -> list[int]:
    if line_index + 1 == len(bullets):
        return []
    indices: list[int] = []
    base_level: int = get_level(bullets[line_index])
    for i in range(line_index + 1, len(bullets)):
        level: int = get_level(bullets[i])
        if level == base_level + 1:
            indices.append(i)
        elif level <= base_level:
            break
    return indices

def create_tree(bullets: list[str], i: int = 0) -> Node:
    stripped_bullet = strip_bullet(bullets[i])
    line_data = complete(f"Line: \"\"\"{stripped_bullet}\"\"\"", LineData)
    node_type: str = "condition" if line_data.condition_classification.is_conditional else "instruction"

    children: list[Node] = []
    for j in get_bullet_indices_under_line(bullets, i):
        node = create_tree(bullets, j)
        if node_type == "instruction" and node.node_type in ["condition", "retrieval"]:
            node_type = "retrieval"
        children.append(node)
    node = Node(node_type=node_type, desc=stripped_bullet, variables=line_data.variable_extraction.variables, condition=line_data.condition_classification.predicate, children=children)

    return node


In [55]:
## Generates trees 1 through 4 as a test
trees: list[Node] = list(map(create_tree, instruction_set[1:5]))

In [None]:
## Converts trees to JSON
output_file = open('output.json', 'w')
json_string: str = "{\"trees\" : ["
for i in range(len(trees)):
    json_string += trees[i].model_dump_json()
    if i != len(trees) - 1:
        json_string += ",\n"

json_string += "]}"
output_file.write(json_string)
output_file.close()
json_string

'{"trees" : [{"node_type":"condition","desc":"Challenge: Client is sad, despondent, or depressed","variables":["client_emotional_state"],"condition":null,"children":[{"node_type":"retrieval","desc":"Conduct Columbia Suicide Severity Rating Scale","variables":["CSSRS"],"condition":null,"children":[{"node_type":"condition","desc":"If positive score: Call Supervisor and consult about next steps","variables":["score","supervisor"],"condition":"positive score","children":[]},{"node_type":"condition","desc":"If negative score:","variables":["score","negative_score"],"condition":null,"children":[{"node_type":"instruction","desc":"Complete Safety Plan (CL 122)","variables":[],"condition":null,"children":[]},{"node_type":"instruction","desc":"Identify plan for client to see provider","variables":["client","provider"],"condition":null,"children":[]},{"node_type":"instruction","desc":"Follow-up with Supervisor and review plan","variables":["supervisor","plan"],"condition":null,"children":[]}]},{"

# Traverser

In [None]:
## Checks if trees have mislabeled nodes
def find_mislabeled(node: Node):
    if node.node_type == "condition" and node.condition == None:
        print(node.desc)
    for child in node.children:
        find_mislabeled(child)

for i in range(len(trees)):
    print(i)
    find_mislabeled(trees[i])


0
Challenge: Client is sad, despondent, or depressed
If negative score:
1
2
If client denies ideation, plan and/or intent:
3
If no symptoms fall in red zone


In [None]:
class Validation(BaseModel):
    is_condition_true: str = Field("Given this condition and information, is the condition true?")


def traverse_tree(node: Node, info: str) -> str:
    instructions: str = ""
    
    if node.node_type == "instruction":
        instructions += node.desc
    
    for child in node.children:
        if child.condition == None or complete(f"Condition: \"\"\"{child.condition}\"\"\"\nInformation: \"\"\"{info}\"\"\"", Validation).is_condition_true:
            instructions += traverse_tree(child, info) + "\n"
    
    return instructions

traverse_tree(trees[2], "Patient is suicidal and violent. They have a negative score. They are an immediate risk to themselves and others.")

"\n\nIf imminent safety risk to client: Call 911 and then call Supervisor\nDeescalate clientTake a calm approach, appear to be in control, lower voice, be a good listener and empathic, avoid an authoritative stance, allow the client to save face, avoid personalizing a client's behavior, provide adequate space between you and the client, never promise what you can't deliver, when in doubt - get out and shout. For additional training in de-escalation, see X\n\nOnce client is calm, complete Safety Plan (CL 122)\n\n\n"