In [1]:
import json

def get_dataset(dataset_string):
  with open(dataset_string, 'r') as file:
      data = json.load(file)
  return data

In [2]:
from approaches.approach2 import approach2
from llms.llm_interaction import GroqClient

patient_records2 = get_dataset('datasets/patient_records2.json')
llm_client = GroqClient(model="llama-3.3-70b-versatile")

ImportError: cannot import name 'approach2' from 'approaches.approach2' (/Users/nathan/Documents/aidb/finalproject/src/approaches/approach2.py)

In [8]:
system_prompt = """
        You are a helpful AI assistant that strictly outputs a python list of tuples, where each tuple is (<semantic_symbol>, explanation) in the order they appear in the patient record.
        The output should be parseable with ast.literal_eval().
        """
prompt = """
            Given the following patient record, extract the following semantic symbols if they exist: {regex}. 
            Return a machine parseable python list of tuples, where each tuple is (<semantic_symbol>, explanation) in the order they appear in the patient record. Only include semantic symbols that are explicitly represented in the patient record, i.e. their explanation should be the actual text in which they appear. 
            IMPORTANT: Only return the list, nothing else.
            Make sure the order of the list reflects the order in which the semantic symbols appear in the patient record, not the order in which they are listed in the regex.
            The explanation should be a brief description of where/how the symbol appears in the text.
            \n\nPatient Record: {record_text}
            """
results = approach2(patient_records2[15:16], llm_client, verbose=True, order_sensitive=True, system_prompt=system_prompt, extraction_prompt_template=prompt)

Processing 1 records...

Patient Record:
ADMISSION DIAGNOSIS 
The patient is a sixty-year-old retired mechanic who presented to the emergency department with symptoms of acute kidney impairment secondary to obstructive nephropathy due to kidney stones. 

HISTORY OF PRESENT ILLNESS
The patient reported experiencing severe abdominal pain radiating to his back accompanied by nausea and vomiting for approximately two days. He admitted to taking over-the-counter anti-inflammatory medications, including acetaminophen to help manage symptoms but with little relief.

PAST MEDICAL HISTORY
He has hypertension for which he is treated with losartan and atorvastatin for elevated cholesterol. His history of recurrent kidney stones necessitates periodic follow-up and monitoring.

SOCIAL HISTORY
As an avid smoker for nearly four decades and an active coffee drinker, there was concern about potential effects of caffeine and nicotine. The patient lives alone in his house but relies heavily on his family

## annotation approach

In [4]:
annotation_extraction_prompt = """
Extract all text enclosed in semantic tags from the annotated medical record below. 

Return only the extracted content as a valid Python list of tuples in the format: [("tag", "text"), ...].

Tags may be nested. In such cases, include all relevant entries separately, even if they overlap.

Example:
Input: <patient>The <vaccine>flu vaccine</vaccine> was given.</patient>
Output: [("vaccine", "flu vaccine"), ("patient", "The flu vaccine was given.")]

Annotated Medical Record:
{annotated_record}
"""



In [7]:
from approaches.helper_methods import extract_symbols_from_annotated_record, extract_list_from_model_output

system_prompt = """
You are an expert medical annotation assistant. Given a patient record, you identify and annotate full contextual text spans corresponding exactly to provided semantic symbols.

Annotate by surrounding each identified context with tags as:
<semantic_symbol> identified context text </semantic_symbol>

Only annotate contexts representing the listed semantic symbols. Return the entire annotated patient record unaltered, preserving all original formatting and punctuation.
"""

prompt = """
Task:
Annotate the patient record by identifying entire contextual spans that correspond to the provided semantic symbols. Each semantic symbol should encompass the full relevant phrase or sentence, not just an isolated word.

Guidelines:
- Each semantic symbol must be annotated using tags in this format:
  <semantic_symbol>full contextual text span</semantic_symbol>
- Annotate only if the patient record explicitly contains contexts that match the provided semantic symbols.
- Preserve the exact original formatting, punctuation, capitalization, and spacing.
- Do not modify the original record's text other than inserting annotations.
- Make sure to close a tag before starting a new one. Do not have nested/overlapping tags.
- Do not add any additional text or tags outside of the original record.

Input:
Semantic Symbols:
{regex}

Patient Record:
{record_text}

Output:
Return only the fully annotated patient record as a single continuous string.
"""

record = patient_records2[15]
res = llm_client.generate(prompt.format(regex=record['s_regex'], record_text=record['record']), system_prompt=system_prompt, max_tokens=1000)
print(f'regular patient record: {record["record"]}')
print(f'semantic regex: {record["s_regex"]}')
print(f'match? {record["match"]}')
print(f"Annotated Patient Record:\n{res}\n")
extracted_symbols1 = extract_symbols_from_annotated_record(res)
print(f'extracted_symbols: {extracted_symbols1}')
print(f'extracted_symbols: {[symbol for symbol, _ in extracted_symbols1]}')
extracted_symbols2 = llm_client.generate(annotation_extraction_prompt.format(annotated_record=res))
extracted_symbols2 = extract_list_from_model_output(extracted_symbols2)
print(f'extracted_symbols: {extracted_symbols2}')
print(f'extracted_symbols: {[symbol for symbol, _ in extracted_symbols2]}')


regular patient record: ADMISSION DIAGNOSIS 
The patient is a sixty-year-old retired mechanic who presented to the emergency department with symptoms of acute kidney impairment secondary to obstructive nephropathy due to kidney stones. 

HISTORY OF PRESENT ILLNESS
The patient reported experiencing severe abdominal pain radiating to his back accompanied by nausea and vomiting for approximately two days. He admitted to taking over-the-counter anti-inflammatory medications, including acetaminophen to help manage symptoms but with little relief.

PAST MEDICAL HISTORY
He has hypertension for which he is treated with losartan and atorvastatin for elevated cholesterol. His history of recurrent kidney stones necessitates periodic follow-up and monitoring.

SOCIAL HISTORY
As an avid smoker for nearly four decades and an active coffee drinker, there was concern about potential effects of caffeine and nicotine. The patient lives alone in his house but relies heavily on his family for support. Fol