In [1]:
### PFX prototyping

In [1]:
import pandas as pd
import os
import textstat
from openai import OpenAI
import json
import re
import requests
from dotenv import load_dotenv
load_dotenv()

from pfx_prompts import baseline_fewshot_prompt,example,baseline_fewshot_icd10_labeling_prompt,icd10_example,baseline_zeroshot_prompt,zeroshot_prompt_reflexion_reading_level_error


In [2]:
USE_MD_AI = True

In [3]:
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
CLIENT = OpenAI(api_key=OPENAI_API_KEY)
OPENAI_MODEL = "gpt-4o"

In [16]:
SIIM_MD_AI_API_TOKEN = os.getenv('SIIM_MD_AI_API_TOKEN')
SIIM_MD_AI_URL = os.getenv('SIIM_MD_AI_URL')
MD_AI_HEADERS = {
    'Content-Type': 'application/json',
    'x-access-token': f"{SIIM_MD_AI_API_TOKEN}"
}
SIIM_MD_AI_MODEL = 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo'

In [17]:
# reading levels
PROFESSIONAL = "Professional"
COLLEGE_GRADUATE = "College graduate"
COLLEGE = "College"
TENTH_TO_TWELTH_GRADE = "10th to 12th grade"
EIGTH_TO_NINTH_GRADE = "8th to 9th grade"
SEVENTH_GRADE = "7th grade"
SIXTH_GRADE = "6th grade"
FIFTH_GRADE = "5th grade"
N_A = "N/A"


In [18]:
df_fewshot = pd.read_csv('pfx_fewshot_examples_college.csv')

In [19]:
df_fewshot.head()


Unnamed: 0,Body_Part,Incidental_Finding,ICD10_code,PFx,PFx_ICD10_code
0,Head,Cerebral cavernous malformation,I67.1,A small cavernous malformation of the brain is...,I67.1
1,Head,Infundibulum of cerebral artery,Q28.2,An infundibulum detected on an MRI of the brai...,Q28.2
2,Neck,Tornwaldt cyst,J39.2,"A Tornwaldt cyst, often detected incidentally ...",J39.2
3,Head,Fluid in the right petrous apex,H70.8,A small amount of incidentally found fluid in ...,H70.8
4,Abdomen,Spinal hemangioma,D18.09,"A spinal hemangioma, often found incidentally ...",D18.09


In [20]:
df_eval = pd.read_csv('pfx_evaluation_data.csv')

In [21]:
df_eval.head()

Unnamed: 0,Body_Part,Incidental_Finding,ICD10_code,Unnamed: 3
0,Head,White matter lesions,R90.82,
1,Head,Arachnoid cyst,Q04.3,
2,Head,Pituitary microadenoma,D35.2,
3,Head,Pineal cyst,Q04.6,
4,Head,Chiari I malformation,Q07.0,


In [22]:
class DictToObj:
    def __init__(self, dictionary):
        for key, value in dictionary.items():
            if isinstance(value, dict):
                # Recursively convert nested dictionaries
                value = DictToObj(value)
            elif isinstance(value, list):
                # Recursively convert lists of dictionaries
                value = [DictToObj(item) if isinstance(item, dict) else item for item in value]
            setattr(self, key, value)

    def __repr__(self):
        return f"{self.__dict__}"

In [23]:
# https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease
def map_reading_level(flesch_reading_ease):
    if flesch_reading_ease < 10:
        return PROFESSIONAL
    elif 10.0 <= flesch_reading_ease < 30.0:
        return COLLEGE_GRADUATE
    elif 30.0 <= flesch_reading_ease < 50.0:
        return COLLEGE
    elif 50.0 <= flesch_reading_ease < 60.0:
        return TENTH_TO_TWELTH_GRADE
    elif 60.0 <= flesch_reading_ease < 70.0:
        return EIGTH_TO_NINTH_GRADE
    elif 70.0 <= flesch_reading_ease < 80.0:
        return SEVENTH_GRADE
    elif 80.0 <= flesch_reading_ease < 90.0:
        return SIXTH_GRADE
    elif 90.0 <= flesch_reading_ease < 100.0:
        return FIFTH_GRADE 
    else:
        return N_A

In [24]:
def extract_json(openai_response):
    if openai_response:
        json_match = re.search(r'```.*(\{.*?\}).*```', openai_response.message.content, re.DOTALL)
       
        if json_match:
            json_str = json_match.group(1)
            # Convert the extracted string to a JSON object
            json_object = json.loads(json_str.replace('\n',''))
            #print(json_object)
            return json_object
        else:
            print("No JSON object found.") 
            return dict()
    else:
        return None

In [25]:
def label_icd10s(pfx_outputs_json):
    pfx_icd10_fewshot_examples = ""
    for i,row in df_fewshot.iterrows():
        pfx_icd10_fewshot_examples += icd10_example.format(**row)
    
    pfx_icd10_codes = []
    pfx_icd10_response = None
    for pfx_output in pfx_outputs_json:
        try:
            prompt = baseline_fewshot_icd10_labeling_prompt.format(examples=pfx_icd10_fewshot_examples,PFx=pfx_output['PFx'])
        except:
            print("ERROR: %s" % pfx_output)
            return
        if USE_MD_AI:
            json_body = {
                "model":SIIM_MD_AI_MODEL,
                "temperature":0.0,
                "stream":False,
                "messages":[
                    {"role": "system", "content": "You are an ICD10 medical coder for incidental findings." },
                    {"role": "user", "content": prompt}
                ],
            }
            pfx_icd10_response = requests.post(SIIM_MD_AI_URL,json=json_body,headers=MD_AI_HEADERS).json()
            pfx_icd10_response = DictToObj(pfx_icd10_response).response
        else:
            pfx_icd10_response = CLIENT.chat.completions.create(
                model=OPENAI_MODEL,
                temperature=0.0,
                messages=[
                    {"role": "system", "content": "You are an ICD10 medical coder for incidental findings." },
                    {"role": "user", "content": prompt}
                ],
                stream=False,
            )
        pfx_icd10_codes.append(pfx_icd10_response.choices[0])
    return list(map(extract_json,pfx_icd10_codes))

In [26]:
pfx_zeroshot_outputs = []
for i,row in df_eval.iloc[:1].iterrows():
    #print(baseline_fewshot_prompt.format(examples=pfx_fewshot_examples,Incidental_Finding=row['Incidental_Finding']))
    prompt = baseline_zeroshot_prompt.format(Incidental_Finding=row['Incidental_Finding'],
                                             Reading_Level=TENTH_TO_TWELTH_GRADE)
    pfx_response = None
    if USE_MD_AI:
        json_body = {
            "model":SIIM_MD_AI_MODEL,
            "temperature":0.0,
            "stream":False,
            "messages":[
                {"role": "system", "content": "You are a medical doctor rephrasing and explaining medical terminology to a patient." },
                {"role": "user", "content": prompt }
            ],
        }
        pfx_response = requests.post(SIIM_MD_AI_URL,json=json_body,headers=MD_AI_HEADERS)
        print(pfx_response.content)
        pfx_response = pfx_response.json()
        pfx_response = DictToObj(pfx_response).response
    else:
        pfx_response = CLIENT.chat.completions.create(
            model=OPENAI_MODEL,
            temperature=0.0,
            messages=[
                {"role": "system", "content": "You are a medical doctor rephrasing and explaining medical terminology to a patient." },
                {"role": "user", "content": prompt }
            ],
            stream=False,
        )
        
    
    pfx_zeroshot_outputs.append(pfx_response.choices[0])

b'{"response":{"id":"8b6c87551db6a3db-ORD","object":"chat.completion","created":1724263294,"model":"meta-llama/meta-llama-3.1-405b-instruct-turbo","prompt":[],"choices":[{"finish_reason":"eos","seed":5692012445694864000,"logprobs":null,"index":0,"message":{"role":"assistant","content":"Here is the rephrased and explained medical terminology in a patient-friendly format:\\n\\n```\\n{\\n  \\"finding\\": \\"White matter lesions\\",\\n  \\"ICD10_code\\": \\"G37.9\\",\\n  \\"PFx\\": \\"We found some small areas of damage in the white matter of your brain. White matter is made up of nerve fibers that help different parts of your brain communicate with each other. These damaged areas, also called lesions, can be caused by a variety of things such as aging, high blood pressure, or other medical conditions. They can also be a sign of a condition that affects the brain, such as multiple sclerosis. It\'s not uncommon for people to have these types of lesions as they get older, and they may not ca

In [27]:
pfx_zeroshot_outputs_json = list(map(extract_json,pfx_zeroshot_outputs))

In [28]:
pfx_zeroshot_outputs_json

[{'finding': 'White matter lesions',
  'ICD10_code': 'G37.9',
  'PFx': "We found some small areas of damage in the white matter of your brain. White matter is made up of nerve fibers that help different parts of your brain communicate with each other. These damaged areas, also called lesions, can be caused by a variety of things such as aging, high blood pressure, or other medical conditions. They can also be a sign of a condition that affects the brain, such as multiple sclerosis. It's not uncommon for people to have these types of lesions as they get older, and they may not cause any symptoms. However, it's good that we know about them so we can keep an eye on them in the future.",
  'PFx_ICD10_code': 'G37.9'}]

In [29]:
#pfx_zeroshot_output_df = pd.DataFrame(pfx_zeroshot_outputs_json)
#pfx_zeroshot_output_df.head()

In [30]:
pfx_zeroshot_output_df = pd.DataFrame(pfx_zeroshot_outputs_json)
pfx_zeroshot_outputs_icd10_labels = label_icd10s(pfx_zeroshot_outputs_json)

In [31]:
pfx_zeroshot_output_df['_0_agent_icd10_codes'] = list(map(lambda x: list(x.values())[0] if x else "", pfx_zeroshot_outputs_icd10_labels))

In [32]:
pfx_zeroshot_output_df["_0_icd10_matches"]= pfx_zeroshot_output_df.ICD10_code == pfx_zeroshot_output_df._0_agent_icd10_codes
pfx_zeroshot_output_df["_0_flesch"] = pfx_zeroshot_output_df['PFx'].apply(textstat.flesch_reading_ease)
pfx_zeroshot_output_df["_0_reading_level"] = pfx_zeroshot_output_df['_0_flesch'].apply(map_reading_level)
pfx_zeroshot_output_df["_0_reading_level_matches"] = pfx_zeroshot_output_df['_0_reading_level'] == TENTH_TO_TWELTH_GRADE

In [33]:
pfx_zeroshot_output_df.head()


Unnamed: 0,finding,ICD10_code,PFx,PFx_ICD10_code,_0_agent_icd10_codes,_0_icd10_matches,_0_flesch,_0_reading_level,_0_reading_level_matches
0,White matter lesions,G37.9,We found some small areas of damage in the whi...,G37.9,G37.4,False,77.37,7th grade,False


In [34]:
#icd10 accuracy -- at least according to GPT4o
sum(pfx_zeroshot_output_df["_0_icd10_matches"])/len(pfx_zeroshot_output_df.index)

0.0

In [35]:
pfx_zeroshot_output_df[pfx_zeroshot_output_df._0_reading_level != TENTH_TO_TWELTH_GRADE].count()

finding                     1
ICD10_code                  1
PFx                         1
PFx_ICD10_code              1
_0_agent_icd10_codes        1
_0_icd10_matches            1
_0_flesch                   1
_0_reading_level            1
_0_reading_level_matches    1
dtype: int64

In [None]:
pfx_zeroshot_outputs_w_reflexion = []
for i,row in pfx_zeroshot_output_df.iloc[:1].iterrows():
    #print(baseline_fewshot_prompt.format(examples=pfx_fewshot_examples,Incidental_Finding=row['Incidental_Finding']))
    if row['_0_reading_level'] != TENTH_TO_TWELTH_GRADE:
        """```{{"Incidental_Finding":"{Incidental_Finding}", "ICD10_code":"{ICD10_code}", "PFx":"{PFx}", "PFx_ICD10_code":"{PFx_ICD10_code}"}}```
</PFx>

The PFx above was at a {_0_reading_level} instead of a {Reading_Level} Flesch-Kincaid reading level. Please try again.
""" 
        prompt = zeroshot_prompt_reflexion_reading_level_error.format(
            Incidental_Finding=row['finding'],
            Reading_Level=TENTH_TO_TWELTH_GRADE,
            ICD10_code=row['ICD10_code'],
            _0_reading_level=row['_0_reading_level'],
            PFx=row['PFx'],
            PFx_ICD10_code=row['PFx_ICD10_code']
        )

        if USE_MD_AI:
            json_body = {
                "model":SIIM_MD_AI_MODEL,
                "temperature":0.0,
                "stream":False,
                "messages":[
                    {"role": "system", "content": "You are a medical doctor rephrasing and explaining medical terminology to a patient." },
                    {"role": "user", "content": prompt }
                ],
            }
            pfx_response = requests.post(SIIM_MD_AI_URL,json=json_body,headers=MD_AI_HEADERS)
            pfx_response = pfx_response.json()
            pfx_response = DictToObj(pfx_response).response
        else:
            pfx_response = CLIENT.chat.completions.create(
                model=OPENAI_MODEL,
                temperature=0.0,
                messages=[
                    {"role": "system", "content": "You are a medical doctor rephrasing and explaining medical terminology to a patient." },
                    {"role": "user", "content": prompt }
                ],
                stream=False,
            )
            
        pfx_zeroshot_outputs_w_reflexion.append(pfx_response.choices[0])
    else:
        pfx_zeroshot_outputs_w_reflexion.append(None)

In [37]:
pfx_zeroshot_outputs_w_reflexion_json = list(map(extract_json,filter(lambda x: x,pfx_zeroshot_outputs_w_reflexion)))

In [44]:
pfx_zeroshot_outputs_w_reflexion

[]

In [38]:
pfx_zeroshot_outputs_w_reflexion_df = pd.DataFrame(pfx_zeroshot_outputs_w_reflexion_json)
pfx_zeroshot_outputs_w_reflexion_icd10_labels = label_icd10s(pfx_zeroshot_outputs_w_reflexion_json)

In [40]:
pfx_zeroshot_outputs_w_reflexion_df.head()


Unnamed: 0,_1_agent_icd10_codes


In [39]:
pfx_zeroshot_outputs_w_reflexion_df['_1_agent_icd10_codes'] = list(map(lambda x: list(x.values())[0] if x else "", pfx_zeroshot_outputs_w_reflexion_icd10_labels))
pfx_zeroshot_outputs_w_reflexion_df["_1_icd10_matches"]= pfx_zeroshot_outputs_w_reflexion_df.ICD10_code == pfx_zeroshot_outputs_w_reflexion_df._1_agent_icd10_codes
pfx_zeroshot_outputs_w_reflexion_df["_1_flesch"] = pfx_zeroshot_outputs_w_reflexion_df['PFx'].apply(textstat.flesch_reading_ease)
pfx_zeroshot_outputs_w_reflexion_df["_1_reading_level"] = pfx_zeroshot_outputs_w_reflexion_df['_1_flesch'].apply(map_reading_level)
pfx_zeroshot_outputs_w_reflexion_df["_1_reading_level_matches"] = pfx_zeroshot_outputs_w_reflexion_df['_1_reading_level'] == TENTH_TO_TWELTH_GRADE
pfx_zeroshot_outputs_w_reflexion_df.head()


AttributeError: 'DataFrame' object has no attribute 'ICD10_code'

In [None]:
pfx_zeroshot_outputs_w_reflexion_df[pfx_zeroshot_outputs_w_reflexion_df._1_reading_level != TENTH_TO_TWELTH_GRADE].count()

In [None]:
sum(pfx_zeroshot_outputs_w_reflexion_df["_1_icd10_matches"])/len(pfx_zeroshot_outputs_w_reflexion_df.index)

In [None]:
pfx_zeroshot_outputs_w_reflexion_df.to_csv('./pfx_zeroshot_outputs_w_reflexion.csv')

In [None]:
pfx_fewshot_examples = ""
for i,row in df_fewshot.iterrows():
    pfx_fewshot_examples += example.format(**row)

pfx_fewshot_outputs = []
pfx_response = None
for i,row in df_eval.iloc[:1].iterrows():
    prompt = baseline_fewshot_prompt.format(examples=pfx_fewshot_examples,
                                            Incidental_Finding=row['Incidental_Finding'],
                                            Reading_Level=TENTH_TO_TWELTH_GRADE)
    if USE_MD_AI:
        json_body = {
            "model":SIIM_MD_AI_MODEL,
            "temperature":0.0,
            "stream":False,
            "messages":[
                {"role": "system", "content": "You are a medical doctor rephrasing and explaining medical terminology to a patient." },
                {"role": "user", "content": prompt }
            ],
        }
        pfx_response = requests.post(SIIM_MD_AI_URL,json=json_body,headers=MD_AI_HEADERS)
        pfx_response = pfx_response.json()
        pfx_response = DictToObj(pfx_response).response
    else:
        pfx_response = CLIENT.chat.completions.create(
            model=OPENAI_MODEL,
            temperature=0.0,
            messages=[
                {"role": "system", "content": "You are a medical doctor rephrasing and explaining medical terminology to a patient." },
                {"role": "user", "content": prompt }
            ],
            stream=False,
        )
    
    pfx_fewshot_outputs.append(pfx_response.choices[0])

In [None]:
pfx_fewshot_outputs_json = list(map(extract_json,pfx_fewshot_outputs))
pfx_fewshot_outputs_icd10_labels = label_icd10s(pfx_fewshot_outputs_json)

In [None]:
pfx_fewshot_output_df = pd.DataFrame(pfx_fewshot_outputs_json)
pfx_fewshot_output_df['_0_agent_icd10_codes'] = list(map(lambda x: list(x.values())[0] if x else "",pfx_fewshot_outputs_icd10_labels))
pfx_fewshot_output_df["_0_icd10_matches"]= pfx_fewshot_output_df.ICD10_code == pfx_fewshot_output_df._0_agent_icd10_codes
pfx_fewshot_output_df["_0_flesch"] = pfx_fewshot_output_df['PFx'].apply(textstat.flesch_reading_ease)
pfx_fewshot_output_df["_0_reading_level"] = pfx_fewshot_output_df['_0_flesch'].apply(map_reading_level)

In [None]:
sum(pfx_fewshot_output_df["_0_icd10_matches"])/len(pfx_fewshot_output_df.index)

In [None]:
pfx_fewshot_output_df.head()

In [None]:
pfx_fewshot_output_df[pfx_fewshot_output_df._0_reading_level != TENTH_TO_TWELTH_GRADE].count()

In [None]:
pfx_fewshot_output_df[pfx_fewshot_output_df._0_flesch < 60].count()