# Making a tree from a list of heirarchal instructions
## Overview
This program takes a file consisting of several heirarchical lists of instructions (e.g. bullet points) and creates a JSON formatted decision tree from it.
## Tools
Pydantic and instructor are used to get properly formatted responses from the LLM. Gemma3 is the current model being used. It is hosted locally using Ollama.

In [36]:
## Imports
from __future__ import annotations
from typing import List, Optional
from enum import Enum
from pydantic import BaseModel, Field
import instructor
from openai import OpenAI

In [46]:
## Loading LLM
MODEL='gemma3'
client = instructor.from_openai(
    OpenAI(
        base_url="http://localhost:11434/v1",
        api_key="ollama",  # required, but unused
    ),
    mode=instructor.Mode.JSON_O1,
)

## TODO look into using system role
def complete(content: str, response_model):
    return client.chat.completions.create(
    model=MODEL,
    messages=[
        # {
        #     "role": "system",
        #     "content": "Avoid restating the prompt.",
        # },
        {
            "role": "user",
            "content": content,
        }
    ],
    response_model=response_model,
    # temperature=0.0
)

# Extracting bullet points
1. The markdown file containing the instructions is loaded
1. The chunk_instructions function is called to break up the file into lists of lines
1. Lines between ones that contain a starter substring (e.g. "Challenge") and blank lines are separated into sublists

In [38]:
## Loading instructions
INSTRUCTION_PATH: str = 'bullet_list_instructions.md'
instructions: str = open(INSTRUCTION_PATH, 'r', encoding='utf-8').read()

In [39]:
## Breaks bullet list of instructions into sublists
## Sublists are all lines between a beginner string and a newline
## A new sublist is created when a begginer string is found, even if there is no newline
def chunk_instructions(instructions: str, beginners: list[str]) -> list[list[str]]:
    lines: list[str] = instructions.split("\n")
    line_count: int = len(lines)

    sublists: list[list[str]] = []
    current_sublist: list[str] = []
    
    for i in range(line_count):
        line: str = lines[i]

        is_beginning: bool = any(beginner in line for beginner in beginners)
        is_end: bool = (line == "") or (i == line_count)
        
        if is_beginning or is_end:
            if current_sublist:
                sublists.append(current_sublist)
            current_sublist = [line] if is_beginning else []
        elif current_sublist:
            current_sublist.append(line)
    return sublists

instruction_set: list[list[str]] = chunk_instructions(instructions, ["Challenge:", "Preparation for community-based visit"])

# Tree Generation
1. Instructor is used to get data about a specific bullet point using the NodeData pydantic class
1. This class contains fields with prompts designed to get specific information from the bullet
1. The create_tree function recursively creates a tree, finding child bullets using the get_bullet_indices_under_line function

In [40]:
## Node class definitions
# NodeType = Enum('NodeType', [('CONDITION', 1), ('RETRIEVAL', 2), ('INSTRUCTION', 3)])

# IS_CONDITIONAL_FIELD_DESCRIPTION: str = """You are a language expert. Your task is to determine whether the following sentence is a conditional or unconditional statement.

# A conditional statement expresses a condition and its consequence, often using words like if, unless, provided that, or in case.
# An unconditional statement does not express such a condition-consequence structure.

# Return only a boolean response.

# Examples:

#     Input: "If the coffee is hot, cool it down."
#     Output: true

#     Input: "If the coffee is hot and the tea is cold, drink the tea."
#     Output: true

#     Input: "If yes: do something."
#     Output: true

#     Input: "Is the coffee hot?"
#     Output: false

#     Input: "The coffee is hot."
#     Output: false

# Now classify the provided sentence:"""

# FEATURE_FIELD_DESCRIPTION: str = """You are a data analysis expert. Your task is to extract the dataset feature or features mentioned in the input sentence. Assume the dataset contains any necessary information.

#     Features may be stated explicitly (e.g., “patient age”) or implicitly (e.g., “Is the patient over 50?” implies “age of patient”).

#     If multiple features are mentioned, list them all, separated by commas.

#     If no features can be determined from the sentence, respond with: "N/A"

#     Do not include extra commentary. Only output the features or "N/A".

# Examples:

#     Input: "Perform an analysis of the patient's seizure history."
#     Output: "Patient's seizure history"

#     Input: "Value is greater than 4."
#     Output: "N/A"

#     Input: "Number of seizures is greater than 4."
#     Output: "Number of seizures"

#     Input: "Locate the patient's address and determine their age."
#     Output: "Patient address, patient age"

#     Input: "Is the patient over 50? If yes, and the patient is male, send them to ward 1."
#     Output: "Age of patient, gender of patient"

# Now analyze the provided sentence:"""

# CONDITION_FIELD_DESCRIPTION = """You are a language analysis expert. Your task is to extract the condition from a conditional statement.

#     The condition is the part of the sentence that specifies the requirement for something else to happen.

#     Only include the exact words from the original sentence — do not paraphrase or reword.

#     If the sentence is not a conditional statement, return only: "N/A"

#     Do not include any extra explanation or commentary — only return the extracted condition or "N/A".

# Examples:

#     Input: "If it is the day and there are no clouds, it is sunny."
#     Output: "it is the day and there are no clouds"

#     Input: "The sun is bright today."
#     Output: "N/A"

#     Input: "Unless the battery is full, keep charging it."
#     Output: "the battery is full"

#     Input: "When the temperature drops below zero, the pipes may freeze."
#     Output: "the temperature drops below zero"
    
#     Now analyze the provided sentence:"""


IS_CONDITIONAL_FIELD_DESCRIPTION = "Determine whether or not the given line contains a condition. The phrases \"if\", \"if not\" and other conditional language is an indication the line is conditional."
CONDITION_FIELD_DESCRIPTION = "If the line contains a conditional statement, find the specific condition. State the condition exactly as stated in the line. If the line does not contain a condition, say \"N/A\"."
FEATURE_FIELD_DESCRIPTION = "List the variables that are directly or indirectly mentioned in the given line. Keep responses and concise as possible. If no such variables exist, say \"N/A\"."


# class NodeData(BaseModel):
#     is_conditional: bool = Field(description=IS_CONDITIONAL_FIELD_DESCRIPTION, default_factory=bool)
#     feature: str = Field(default=FEATURE_FIELD_DESCRIPTION)
#     condition: str = Field(default=CONDITION_FIELD_DESCRIPTION)




class ConditionClassification(BaseModel):
    reasoning: str = Field(..., description="Determine if the given line is an if-statement. Explain your reasoning in detail. If it is an if-statement, clearly state the condition. Conditions can be as simple as \"yes\" or \"no\".")
    predicate: Optional[str] = Field(..., description="The condition that the if-statement depends on, as determed by your reasoning.")
    is_conditional: bool

class VariableExtractionResponse(BaseModel):
    reasoning: str = Field(..., description="Determine what variables are referenced in the given line. Be as specific as possible and explain your reasoning in detail.")
    variables: List[str] = Field(..., description="Based on your reasoning, make a list of the variable(s) in the line. Give the variables descriptive names.")


class LineData(BaseModel):
    condition_classification: ConditionClassification
    variable_extraction: VariableExtractionResponse


class Node(BaseModel):
    node_type: str
    desc: str
    variables: List[str]
    condition: Optional[str]
    children: list[Node]


# complete("If X is true, go home.", ConditionClassification).model_dump_json()
complete("Line: \"\"\"If yes\"\"\"", LineData).model_dump_json()
# complete("Hello", VariableExtractionResponse).model_dump_json()

'{"condition_classification":{"reasoning":"The line \\"If yes\\" is a simple conditional statement. It checks if a condition is true. In this case, the condition is implicitly \\"yes\\".","predicate":"yes","is_conditional":true},"variable_extraction":{"reasoning":"The line contains a single conditional element. The condition is represented by the word \'yes\'.","variables":["yes"]}}'

In [41]:
def strip_bullet(bullet: str) -> str:
    ascii_bullet = bullet.encode('ascii', 'ignore').decode()
    start = 0
    for i in range(len(ascii_bullet)):
        if ascii_bullet[i] == "#":
            continue
        start = i
        break
    return ascii_bullet[start:].strip()


def get_level(bullet: str) -> int:
    level = 0
    for char in bullet:
        if char == "#":
            level += 1
        else:
            break
    return level


def get_bullet_indices_under_line(bullets: list[str], line_index: int) -> list[int]:
    if line_index + 1 == len(bullets):
        return []
    indices: list[int] = []
    base_level: int = get_level(bullets[line_index])
    for i in range(line_index + 1, len(bullets)):
        level: int = get_level(bullets[i])
        if level == base_level + 1:
            indices.append(i)
        elif level <= base_level:
            break
    return indices

def create_tree(bullets: list[str], i: int = 0) -> Node:
    stripped_bullet = strip_bullet(bullets[i])
    line_data = complete(f"Line: \"\"\"{stripped_bullet}\"\"\"", LineData)
    node_type: str = "condition" if line_data.condition_classification.is_conditional else "instruction"

    children: list[Node] = []
    for j in get_bullet_indices_under_line(bullets, i):
        node = create_tree(bullets, j)
        if node_type == "instruction" and node.node_type in ["condition", "retrieval"]:
            node_type = "retrieval"
        children.append(node)
    node = Node(node_type=node_type, desc=stripped_bullet, variables=line_data.variable_extraction.variables, condition=line_data.condition_classification.predicate, children=children)

    return node


In [49]:
trees: list[Node] = list(map(create_tree, instruction_set[1:5]))

In [None]:
output_file = open('output.json', 'w')
json_string: str = "{\"trees\" : ["
for i in range(len(trees)):
    json_string += trees[i].model_dump_json()
    if i != len(trees):
        json_string += ",\n"

json_string += "]}"
output_file.write(json_string)
output_file.close()
json_string

'{"trees" : [{"node_type":"retrieval","desc":"Preparation for community-based visit","variables":["community_visit","preparation"],"condition":null,"children":[{"node_type":"retrieval","desc":"Pre-screening","variables":[],"condition":null,"children":[{"node_type":"instruction","desc":"Review crisis call sheet","variables":["crisis_call_sheet"],"condition":null,"children":[]},{"node_type":"instruction","desc":"Review client report card, bio, and last 30 days of notes","variables":["client_report_card","client_bio","last_30_days_notes"],"condition":null,"children":[]},{"node_type":"instruction","desc":"Determine clients understanding of reason for visit","variables":[],"condition":null,"children":[]},{"node_type":"condition","desc":"Ensure emergency contact has been outreached","variables":["emergency_contact"],"condition":"emergency contact has been outreached","children":[]}]},{"node_type":"retrieval","desc":"Screening","variables":[],"condition":null,"children":[{"node_type":"retriev

# Traverser

In [54]:
def validate_tree(node: Node) -> bool:
    if node.node_type == "condition" and node.condition == None:
        print(node.desc)
        return False
    for child in node.children:
        if validate_tree(child) == False:
            return False
    return True

for tree in trees:
    print(validate_tree(tree))




True
Conduct Mini Mental Status (?)
False
True
Call Supervisor and consult about next steps
False


In [None]:
class Validation(BaseModel):
    is_condition_true: str = Field("Given this condition and information, is the condition true?")


def traverse_tree(node: Node, info: str) -> str:
    instructions: str = ""
    
    if node.node_type == "instruction":
        instructions += node.desc
    
    for child in node.children:
        if child.condition == None or complete(f"Condition: \"\"\"{child.condition}\"\"\"\nInformation: \"\"\"{info}\"\"\"", Validation).is_condition_true:
            instructions += traverse_tree(child, info) + "\n"
    
    return instructions

traverse_tree(trees[2], "Patient is suicidal and violent. They have a negative score. They are an immediate risk to themselves and others.")

'If safety issue identified, call Supervisor and consult about next steps\n\n\n'