<a href="https://colab.research.google.com/github/louisdennington-design/decision-tree-dissertation/blob/main/the_state_manager.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Current issue needing attention: list of fields for structured JSON guideline and patient schema are currently repeated across several places here and in llm_makes_json, needing parallel updates and vulnerable to errors. Compile one list of keys that can be shared across the following?:



*   llm_makes_json -> def construct_prompt()
*   llm_makes_json -> def orchestrate_create_json()
*   the_state_manager -> def extract_patient_facts()



In [1]:
# Mount Google Drive

from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [2]:
# Import packages

!pip install -q streamlit
!pip install -q streamlit-chat

import os
import json
from transformers import AutoModelForCausalLM, AutoTokenizer
import streamlit as st
from streamlit_chat import message

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m77.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m139.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h



In [3]:
# Global parameters

MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"

LOAD_PATH = "/content/drive/My Drive/Colab Notebooks/Dissertation/JSON"
LOAD_GUIDELINE_STRUCTURED = os.path.join(LOAD_PATH, "guideline_structured.json")
LOAD_PATIENT_SCHEMA = os.path.join(LOAD_PATH, "patient_schema.json")

SAVE_PATH = os.path.join(LOAD_PATH, "patient_schema.json")
os.makedirs(SAVE_PATH, exist_ok=True)
SAVE_FILE = os.path.join(SAVE_PATH, "DEFINE") # DEFINE BEFOFE RUNNING

In [None]:
# Load LLM

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype="auto",
    device_map="auto")

In [None]:
# Load JSON

def load_json(file_path):
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            return json.load(f)
    except FileNotFoundError:
        raise FileNotFoundError(f'JSON file not found: {file_path}')

guideline_structured = load_json(LOAD_GUIDELINE_STRUCTURED)

print(type(guideline_structured))
print(len(guideline_structured))
print(guideline_structured)

patient_schema = load_json(LOAD_PATIENT_SCHEMA)

print(type(patient_schema))
print(len(patient_schema))
print(patient_schema[0])

In [None]:
# Create record of exact guideline version extracted with metadata
"""In case of future runs with differernt guidelines and structure
Then problems with a run can be traced precisely"""
- guideline name
- html
- scrape date

In [None]:
# Save local copy of JSON with metadata
- protect the file somehow from being overwritten or edited?

In [None]:
# Create an index or summary for the JSON to enable more reliable searching by the state manager?

In [None]:
#### NOTE
"""
If the question being asked tessellates with a particular section (e.g., a medication)
it may be worth flagging other areas (e.g., carer support.. physical health...)
The model may need to supply back to the user a list of "have you also considered..." making use of these headings
to prevent a narrow focus on the main content of the question
and ensure that other recommendation sections are also being considered
"""

In [None]:
def get_user_message():
    """
    Uses a UI like Streamlit or an API to send and receive messages
    """

    # From https://www.geeksforgeeks.org/python/create-a-chatbot-with-openai-and-streamlit-in-python/
    st.title(&quot;NICE GUIDELINE CHATBOT&quot;)
    if 'user_input' not in st.session_state:
        st.session_state['user_input'] = []

    if 'openai_response' not in st.session_state:
        st.session_state['openai_response'] = []

    def get_text():
        input_text = st.text_input(&quot;Enter your question and patient information here:&quot;, key=&quot;input&quot;)
        return input_text

    user_input = get_text()

    if user_input:
        output = api_calling(user_input)
        output = output.lstrip(&quot;\n&quot;)

        # Store the output
        st.session_state.openai_response.append(user_input)
        st.session_state.user_input.append(output)


In [None]:
def call_llm(prompt):
    """
    General function for call to the LLM
    What is passed to the LLM ("prompt") is decided by other functions
    """

    inputs = tokenizer(prompt,
                       return_tensors="pt").to(model.device)

    outputs = model.generate(**inputs,
                             max_new_tokens=500,
                             do_sample=False) # deterministic decoding without random sampling
                                            # if removed, reinstate temperature / top_p / top_k

    llm_response = tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:],
                                          skip_special_tokens=True)

    return llm_response[0]

In [7]:
def extract_patient_facts(user_input):

    # At some point need to decide whether this should output patient_facts variable
    # ... or store the output in a predefined and loaded patient_schema.json

    """
    Use LLM to decompose message into fields captured by JSON-keys
    """

    prompt = f"""You are extracting structured information from USER INPUT.

                RULES:
                - output must be valid JSON only (no markdown)
                - do not invent or infer clinical information; use only what is present in the user input
                - For ALL clinical descriptor fields (e.g. phase, severity, medication): populate a value ONLY if it is explicitly stated in the user input. Do NOT infer information that is not directly stated. If not explicit, use null.
                - 'age' must be an integer
                - 'age_group' must be assigned using these rules:
                    - if age < 12: 'child'
                    - if age >= 13 and age <= 17: 'young_person'
                    - if age >= 65: 'older_adult'
                    - otherwise: 'adult'
                - 'gender' must be one of: ['male', 'female', 'other', null]
                - Extract 'urgency' as 'True' if the text includes 'urgent', 'urgently', 'immediate' or 'immediately', otherwise 'False'
                - 'manic_episode_history' must be one of: ['none', 'one', 'multiple', null]
                - 'current_manic_phase' must be one of: ['mania', 'hypomania', 'bipolar_depression', 'mixed', 'rapid_cycling', 'euthymic', null]
                - 'mania_severity' must be one of: ['mild', 'moderate', 'severe', null]
                - 'current_psychosis' must be: ['present', 'absent', null]
                - 'diagnoses' must be one or more comorbid mental health diagnoses. If more than one diagnosis is mentioned, record all as a list of strings.
                - 'current_medication' must be a medication name or null. If more than one medication is mentioned, record all as a list of strings.
                - 'medication_adherence' must be one of: ['good', 'poor', null]
                - 'physical_health_longterm' must be the name of a physical disease diagnosis that affects a person for more than six months. If more than one diagnosis is mentioned, record all as a list of strings.
                - 'physical_health_recent' must be the name of a transient disease (less than six months) or physical health event from the last six months. If more than one diagnosis is mentioned, record all as a list of strings.
                - 'risk' must be one of: ['self_harm', 'risk_to_others', null]
                - 'psychological_therapy' must be one of: ['offered', null]
                - 'care_coordination' must be one of: ['current', 'offered', null]
                - 'patient_preference' is a string describing what the patient has requested or expressed a preference for. If no preference is stated, record null.
                - you MUST use 'null' if the information for any field is not explicit in the recommendation or heading
                - if there is more than one value for any field, retain all as a list of strings

                Produce JSON with exactly these keys:
                - age
                - age_group
                - gender
                - urgency
                - manic_episode_history
                - current_manic_phase
                - mania_severity
                - current_psychosis
                - diagnoses
                - current_medication
                - medication_adherence
                - physical_health_longterm
                - physical_health_recent
                - risk
                - psychological_therapy
                - care_coordination
                - patient_preferences

                USER INPUT: {user_input}
                """

    inputs = tokenizer(prompt,
                       return_tensors="pt").to(model.device)

    outputs = model.generate(**inputs,
                             max_new_tokens=500,

                             do_sample=False) # deterministic decoding without random sampling
                                            # if removed, reinstate temperature / top_p / top_k

    patient_facts = tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:],
                                          skip_special_tokens=True)

    return patient_facts[0]

In [9]:
# Test of conversion to patient facts

user_input_test = "The patient is a 23-year-old woman with suspected bipolar disorder - though we are still waiting for the duty psychiatrist to confirm this - as well as previous diagnoses of borderline personality disorder and ADHD. She is currently under the local secondary care team after an admission eight months ago, when she was presenting as aggressive and with grandiose ideas about being a religious prophet. She was prescribed lithium but, according to the team, she has not been taking it consistently because she doesn't like the drowsiness. We're concerned about her because, recently, she has presented as more irritable again and once threatened our deputy manager with kidnap, saying that she could tell he was possessed by a demon. She says she hasn't been sleeping for one week. The GP reported that she smelt strongly of alcohol when seen there. The patient says she doesn't want to go back to hospital. Should we considering increasing her lithium?"

patient_facts_test = extract_patient_facts(user_input_test)

print(patient_facts_test)

 Based on the provided user input, here is the extracted structured information in JSON format:

```json
{
    "age": 23,
    "gender": "female",
    "urgency": true,
    "manic_episode_history": "multiple",
    "current_manic_phase": "mania",
    "mania_severity": null,
    "current_psychosis": "present",
    "diagnoses": ["bipolar disorder", "borderline personality disorder", "ADHD"],
    "current_medication": ["lithium"],
    "medication_adherence": "poor",
    "physical_health_longterm": null,
    "physical_health_recent": ["alcohol use"],
    "risk": "risk_to_others",
    "psychological_therapy": null,
    "care_coordination": null,
    "patient_preferences": "doesn't want to go back to hospital"
}
``` 

This JSON structure adheres to the rules specified and captures all the relevant information from the user input.


In [10]:
user_input_test = "The patient is a 38-year-old man with a confirmed diagnosis of bipolar II disorder, managed in primary care with intermittent secondary care input. He has a history of recurrent depressive episodes and brief hypomanic periods but no previous psychosis or admissions. He is currently prescribed sertraline by his GP, which was restarted six months ago following a prolonged low mood, reduced motivation, and social withdrawal. He reports feeling flat but less suicidal since restarting the antidepressant. Over the past three weeks, however, he has been sleeping only four hours a night, feels more restless, and has started several unrealistic business ideas, though he denies feeling euphoric. There is no mood stabiliser in place. Is continuing antidepressant monotherapy appropriate?"

patient_facts_test = extract_patient_facts(user_input_test)

print(patient_facts_test)

 Based on the provided user input, here is the extracted structured information in JSON format:

```json
{
    "age": 38,
    "gender": "male",
    "urgency": false,
    "manic_episode_history": "multiple",
    "current_manic_phase": null,
    "mania_severity": null,
    "current_psychosis": "absent",
    "diagnoses": ["bipolar II disorder", "depressive episodes"],
    "current_medication": ["sertraline"],
    "medication_adherence": null,
    "physical_health_longterm": null,
    "physical_health_recent": null,
    "risk": null,
    "psychological_therapy": null,
    "care_coordination": null,
    "patient_preferences": null
}
``` 

Note: The input does not provide specific details about the patient's current manic phase, mania severity, physical health conditions, recent events, risk factors, psychological therapy preferences, or care coordination. Therefore, those fields are set to `null`. The patient's history of recurrent depressive episodes and brief hypomanic periods is noted un

In [11]:
user_input_test = "The patient is a 57-year-old woman with a long-standing diagnosis of bipolar I disorder, currently euthymic and living independently. She has been stable for several years on lithium, which has previously been highly effective in preventing relapse. Recent blood tests, however, show a decline in renal function, and her GP is concerned about the long-term safety of continuing lithium. The patient is anxious about medication changes, as her last manic episode prior to lithium resulted in significant financial losses and hospitalisation. She denies current mood symptoms and is adherent with treatment. Is continuing lithium appropriate and what alternative maintenance strategies would be recommended given emerging physical health risks?"

patient_facts_test = extract_patient_facts(user_input_test)

print(patient_facts_test)

 ```json
{
    "age": 57,
    "gender": "female",
    "urgency": false,
    "manic_episode_history": "multiple",
    "current_manic_phase": "euthymic",
    "mania_severity": null,
    "current_psychosis": "absent",
    "diagnoses": ["bipolar I disorder"],
    "current_medication": ["lithium"],
    "medication_adherence": "good",
    "physical_health_longterm": ["renal function decline"],
    "physical_health_recent": null,
    "risk": null,
    "psychological_therapy": null,
    "care_coordination": null,
    "patient_preferences": null
}
``` ```json
{
    "age": 57,
    "gender": "female",
    "urgency": false,
    "manic_episode_history": "multiple",
    "current_manic_phase": "euthymic",
    "mania_severity": null,
    "current_psychosis": "absent",
    "diagnoses": ["bipolar I disorder"],
    "current_medication": ["lithium"],
    "medication_adherence": "good",
    "physical_health_longterm": ["renal function decline"],
    "physical_health_recent": null,
    "risk": null,
    "ps

In [5]:
def select_relevant_recommendations(patient_facts, guideline_structured):
    """
    Based on matches between decomposed user input and guideline_structured.json
    """

    matching_constrained_keys = []
    matching_unconstrained_keys = []

    relevant_headings = []
    relevant_rec_nums = []
    relevant_recommendations = []

    for i, entity in enumerate(guideline_structured):

        for key in patient_facts:
            if key not in entity:
                continue

            patient_facts_key = patient_facts[key]
            entity_key = entity[key]

            # For boolean fields or those with constrained values
            if patient_facts_key == entity_key:
                matching_keys.append((key))
                relevant_headings.append(entity['heading_context'])
                relevant_rec_nums.append(entity_key['original_recommendation_number'])
                relevant_recommendations.append(entity_key['original_recommendation_text'])

            # For free text fields where the keys match but not necessarily the value
            elif patient_facts_key != None and entity_key != None:
                matching_unconstrained_keys.append((key))
                relevant_headings.append(entity_key['heading_context'])
                relevant_rec_nums.append(entity_key['original_recommendation_number'])
                relevant_recommendations.append(entity_key['original_recommendation_text'])

    return matching_keys, matching_unconstrained_keys

In [6]:
json_1 = {
    "age": 57,
    "gender": "female",
    "urgency": False,
    "manic_episode_history": "multiple",
    "current_manic_phase": "euthymic",
    "mania_severity": "null",
    "current_psychosis": "absent",
    "diagnoses": ["bipolar I disorder"],
    "current_medication": ["lithium"],
    "medication_adherence": "good",
    "physical_health_longterm": ["renal function decline"],
    "physical_health_recent": "null",
    "risk": "null",
    "psychological_therapy": "null",
    "care_coordination": "null",
    "patient_preferences": "null"}

json_2 = {
    "age": 57,
    "gender": "male",
    "urgency": False,
    "manic_episode_history": "single",
    "current_manic_phase": "mania",
    "mania_severity": "null",
    "deciduous": False,
    "diagnoses": ["bipolar I disorder"],
    "current_medication": ["valproate"],
    "medication_adherence": "good",
    "physical_health_longterm": ["renal function decline"],
    "physical_health_recent": "null",
    "risk": "self_harm",
    "psychological_therapy": "null",
    "care_coordination": "null",
    "patient_preferences": "likes holidays"}

if json_1["urgency"] == json_2["urgency"]:
    print("Match")
else:
    print("No match")

Match


In [None]:
for key in patient_facts:

    if key not in guideline_structured:
    continue

    patient_facts_key = patient_facts[key]
    guideline_structured_key = guideline_structured[key]

    if patient_facts_key == guideline_structured_key:
        matching_keys.append((key))

    if guideline_structured

In [None]:
# Test

select_relevant_recommendations(patient_facts)

In [None]:
def decide_next_question_or_answer():
    """
    - must record history of decisions taken for audit
    """


In [None]:
def generate_question():
    # Needed as separate step?

In [None]:
def update_patient_dict():
    """
    - what is known about the "patient" object
    - history of questions asked and user answers, or is this in the UI?
    """