Preprocessing

In [1]:
!pip3 install -qU bs4 tiktoken openai langchain langchain-community pinecone pypdf tqdm dotenv


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")

pdf_folder_path = "references/" #clinical document location

In [3]:
from langchain.document_loaders import PyPDFDirectoryLoader
loader = PyPDFDirectoryLoader(pdf_folder_path)
dataset = loader.load()

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 11 0 (offset 0)
Ignoring wrong pointing object 17 0 (offset 0)
Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)


In [4]:
data = []

for doc in dataset:
    data.append({
        'reference': doc.metadata['source'].replace('rtdocs/', 'https://'),
        'text': doc.page_content
    })

In [5]:
import tiktoken

tokenizer = tiktoken.get_encoding('cl100k_base')

# create the length function
def tiktoken_len(text):
    tokens = tokenizer.encode(
        text,
        disallowed_special=()
    )
    return len(tokens)

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100,
    length_function=tiktoken_len,
    separators=["\n\n", "\n", " ", ""]
)

In [7]:
from uuid import uuid4
from tqdm.auto import tqdm

chunks = []

for idx, record in enumerate(tqdm(data)):
    texts = text_splitter.split_text(record['text'])
    chunks.extend([{
        'id': str(uuid4()),
        'text': texts[i],
        'chunk': i,
        'reference': record['reference']
    } for i in range(len(texts))])

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 278/278 [00:00<00:00, 756.41it/s] 


Embedding Model

In [8]:
import openai

embed_model = "text-embedding-3-small"

Vector Storage

In [9]:
from pinecone import Pinecone
from pinecone import ServerlessSpec

pc = Pinecone(
    api_key=pinecone_api_key, #Pinecone API
    # environment="gcp-starter"
)
index_name = "preopai-index-py"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        vector_type="dense",
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        ),
    )

index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 1336}},
 'total_vector_count': 1336,
 'vector_type': 'dense'}

In [10]:
from tqdm.auto import tqdm
import datetime
from time import sleep
from openai import OpenAI
client = OpenAI(api_key=openai_api_key)

batch_size = 100

for i in tqdm(range(0, len(chunks), batch_size)):
    i_end = min(len(chunks), i+batch_size)
    meta_batch = chunks[i:i_end]
    ids_batch = [x['id'] for x in meta_batch]
    texts = [x['text'] for x in meta_batch]
    try:
        res = client.embeddings.create(input=texts, model=embed_model)
    except:
        done = False
        while not done:
            sleep(5)
            try:
                res = client.embeddings.create(input=texts, model=embed_model)
                done = True
            except:
                pass
    embeds = [record.embedding for record in res.data]
    meta_batch = [{
        'text': x['text'],
        'chunk': x['chunk'],
        'reference': x['reference']
    } for x in meta_batch]
    to_upsert = list(zip(ids_batch, embeds, meta_batch))
    index.upsert(vectors=to_upsert)

100%|██████████| 5/5 [00:20<00:00,  4.15s/it]


Retrieval Agent

In [11]:
from pinecone import Pinecone

pc = Pinecone(
    api_key=pinecone_api_key, #Pinecone API
    # environment="gcp-starter"
)
index_name = "preopai-index-py"

index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 1776}},
 'total_vector_count': 1776,
 'vector_type': 'dense'}

In [12]:
from openai import OpenAI
client = OpenAI()

query = str("38/Chinese/Female\
Allergy to aspirin, paracetamol, penicillin - rashes and itchiness \
ExSmoker—smoked 10 years ago/Occasional Drinker \
LMP: last month\
Wt 94.7 Ht 166.3 BMI 34.2 BP 127/81 HR 88 SpO2 100% on RA \
Coming in for BILATERAL REVISION FESS, REVISION SEPTOPLASTY, ADENOIDECTOMY, AND BILATERAL INFERIOR TURBINOPLASTIES/SEVERE OSA ON CPAP \
=== PAST MEDICAL HISTORY ===\
1. Severe OSA on CPAP—AHI 58—CPAP settings: AutoCPAP (4–15) cmH2O, without humidifier/Chinstrap\
2. Right persistent Sinusitis\
3. Allergic rhinitis\
4. Adenoid hypertrophy\
5. High BMI\
6. Asthma—f/u GP, last seen 3 months ago for attack—on PRN ventolin—Does not use ventolin at all—No previous admissions/ intubations for asthma\
7. Diabetes—HbA1C 9.4%, Last seen outpatient doctor >1 year ago.\
No history of HTN/ HLD/ IHD/ CVA\
=== SURGICAL HISTORY===\
Tonsillectomy > 10 years ago mild PONV\
===Investigations===\
Hb 13.0 TW 4 Plt 392\
INR PT APTT normal\
Na 134 K3.4 Cr 77 Glu 13\
ECG NSR\
CXR NAD\
=== MEDICATIONS===\
Ventolin PRN\
LMP; Last menstrual period, Wt; Weight") #clinical query

res = client.embeddings.create(
    input=[query],
    model=embed_model
)

xq = res.data[0].embedding
res = index.query(
    namespace="__default__",
    vector=xq, 
    top_k=10, 
    include_metadata=True
)

Response Generation

In [13]:
contexts = [item['metadata']['text'] for item in res['matches']]
augmented_query = "\n\n---\n\n".join(contexts)+"\n\n-----\n\n"+query

In [14]:
print(augmented_query)

3rd Edition, January 2014  12 
Surgical Preadmission Guide 
Does your patient require a Surgical Preadmission Assessment? 
Use the following tool to ascertain if your patient requires a surgical preadmission assessment 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
The Surgical Preadmission Assessment information provided by the Surgical CSU Heads of Unit – See Appendix 3 – Surgical CSU Heads of Units
Minor surgery 
Breast/Gen Surg 2 eg. E/O Breast Lump, node biopsy  
Colorectal eg. EUA fistulas, haemorrhoids 
ENT eg. Tonsillectomy, EUA nose/ears, panendoscopy 
Neuro eg. CTR 
Orthopaedics eg. Arthroscopy, r/o metal 
Plastics eg. E/O BCC/skin lesions 
Urology eg. Rigid cysto 
Vascular eg. Varicose veins 
Upper GI eg. Laparoscopy, lap band port revision. 
 
 
 
Unstable medical conditions for minor surgery 
may require surgical pre admission for patient 
optimisation 
 
(If unsure, please liaise with the ARNs) 
 
 
 
 
 
 
Proceed with planned surgery 
 
 
Intermediate Surg

LLM Integration (GPT 4)

In [15]:
from openai import OpenAI
client = OpenAI()

response = client.chat.completions.create(
  model="o3-mini",
  messages=[
    {"role": "system", "content": 
     "You are the anesthesiologist seeing this patient in the preoperative clinic 2 weeks before the date of operation. The patients have already taken their routine preoperative\
      investigations and the findings are listed within the clinical summary.\
      Your role is to evaluate the clinical summary and give the preoperative anesthesia instructions for the following patient targeted to your fellow medical colleagues. You are to\
      follow strictly the guidelines.\
      Your instructions should consist of the following components:\
      1. Provide a traffic light status for the surgery. If there is a risk and the patient needs to be seen by a Doctor or a Nurse, its red, if further tests are required, its yellow; if the patient is healthy and ready for surgery, its green.\
      2. Fasting instructions - list instructions based on the number of hours before the time of the listed surgery\
      3. Suitability for preoperative carbohydrate loading — yes/no.\
      4. Medication instructions — name each medication and give the instructions for the day of the operation and days leading up to the operation as required.\
      5. Any instructions for the healthcare team—for example, preoperative blood group matching, arranging for preoperative dialysis, or standby post-operative high\
      dependency/ICU beds.\
      6. Provide the RCRI, ASA, DASI and STOP-BANG scores for the patient. If you cannot calculate it, provide the extra information you need to calculate it.\
      Your instructions are the final instructions, explain the reasoning for your \
      if you are uncertain, explain what further information you require.\
      If the medical condition is already optimized, there is no need to offer further optimization. If there\
      are no relevant instructions in any of the above categories, leave it blank and write NA"}, #System Prompt
    {"role": "user", "content": augmented_query},
  ]
)

In [16]:
print(response.choices[0].message.content)

Below is the final pre‐operative anesthesia plan for this 38‐year‐old female scheduled for bilateral revision endoscopic sinus surgery (revision FESS, revision septoplasty, adenoidectomy, and bilateral inferior turbinoplasties):

──────────────────────────────
1. Traffic Light Status  
 • RED – This patient has significant risk factors (severe OSA on CPAP and poorly controlled diabetes with an HbA1C of 9.4%) that warrant an in-person anesthesia pre‐admission assessment and possible further optimization before proceeding with surgery.

──────────────────────────────
2. Fasting Instructions  
 • Clear fluids: stop 2 hours before scheduled surgery  
 • Light solids (toast, clear soup, etc.): stop 6 hours before surgery  
 • Fatty or heavy meals: ideally stop 8 hours pre‐op

──────────────────────────────
3. Suitability for Preoperative Carbohydrate Loading  
 • NO – Given her uncontrolled diabetes, carbohydrate loading is not recommended.

──────────────────────────────
4. Medication Inst