In [1]:
### PFX prototyping

In [1]:
import pandas as pd
import os
from readability import Readability
import textstat
from openai import OpenAI
import json
import re
from pfx_prompts import baseline_fewshot_prompt,example,baseline_fewshot_icd10_labeling_prompt,icd10_example,baseline_zeroshot_prompt,zeroshot_prompt_reflexion_reading_level_error
client = OpenAI()

In [2]:
# reading levels
PROFESSIONAL = "Professional"
COLLEGE_GRADUATE = "College graduate"
COLLEGE = "College"
TENTH_TO_TWELTH_GRADE = "10th to 12th grade"
EIGTH_TO_NINTH_GRADE = "8th to 9th grade"
SEVENTH_GRADE = "7th grade"
SIXTH_GRADE = "6th grade"
FIFTH_GRADE = "5th grade"
N_A = "N/A"


In [3]:
df_fewshot = pd.read_csv('pfx_fewshot_examples_college.csv')

In [4]:
df_fewshot.head()


Unnamed: 0,Body_Part,Incidental_Finding,ICD10_code,PFx,PFx_ICD10_code
0,Head,Cerebral cavernous malformation,I67.1,A small cavernous malformation of the brain is...,I67.1
1,Head,Infundibulum of cerebral artery,Q28.2,An infundibulum detected on an MRI of the brai...,Q28.2
2,Neck,Tornwaldt cyst,J39.2,"A Tornwaldt cyst, often detected incidentally ...",J39.2
3,Head,Fluid in the right petrous apex,H70.8,A small amount of incidentally found fluid in ...,H70.8
4,Abdomen,Spinal hemangioma,D18.09,"A spinal hemangioma, often found incidentally ...",D18.09


In [5]:
df_eval = pd.read_csv('pfx_evaluation_data.csv')

In [6]:
df_eval.head()

Unnamed: 0,Body_Part,Incidental_Finding,ICD10_code,Unnamed: 3
0,Head,White matter lesions,R90.82,
1,Head,Arachnoid cyst,Q04.3,
2,Head,Pituitary microadenoma,D35.2,
3,Head,Pineal cyst,Q04.6,
4,Head,Chiari I malformation,Q07.0,


In [7]:
# https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease
def map_reading_level(flesch_reading_ease):
    if flesch_reading_ease < 10:
        return PROFESSIONAL
    elif 10.0 <= flesch_reading_ease < 30.0:
        return COLLEGE_GRADUATE
    elif 30.0 <= flesch_reading_ease < 50.0:
        return COLLEGE
    elif 50.0 <= flesch_reading_ease < 60.0:
        return TENTH_TO_TWELTH_GRADE
    elif 60.0 <= flesch_reading_ease < 70.0:
        return EIGTH_TO_NINTH_GRADE
    elif 70.0 <= flesch_reading_ease < 80.0:
        return SEVENTH_GRADE
    elif 80.0 <= flesch_reading_ease < 90.0:
        return SIXTH_GRADE
    elif 90.0 <= flesch_reading_ease < 100.0:
        return FIFTH_GRADE 
    else:
        return N_A

In [8]:
def extract_json(openai_response):
    if openai_response:
        json_match = re.search(r'```.*(\{.*?\}).*```', openai_response.message.content, re.DOTALL)
        if json_match:
            json_str = json_match.group(1)
            # Convert the extracted string to a JSON object
            json_object = json.loads(json_str.replace('\n',''))
            #print(json_object)
            return json_object
        else:
            print("No JSON object found.") 
            return dict()
    else:
        return None

In [9]:
def label_icd10s(pfx_outputs_json):
    pfx_icd10_fewshot_examples = ""
    for i,row in df_fewshot.iterrows():
        pfx_icd10_fewshot_examples += icd10_example.format(**row)
    
    pfx_icd10_codes = []
    for pfx_output in pfx_outputs_json:
        try:
            prompt = baseline_fewshot_icd10_labeling_prompt.format(examples=pfx_icd10_fewshot_examples,PFx=pfx_output['PFx'])
        except:
            print(pfx_output)
        pfx_icd10_response = client.chat.completions.create(
                #model="gpt-4o-mini",
                model="gpt-4o",
                temperature=0.0,
                messages=[
                    {"role": "system", "content": "You are an ICD10 medical coder for incidental findings." },
                    {"role": "user", "content": prompt }
                ],
                stream=False,
            )
        pfx_icd10_codes.append(pfx_icd10_response.choices[0])
    return list(map(extract_json,pfx_icd10_codes))

In [10]:
pfx_zeroshot_outputs = []
for i,row in df_eval.iloc[:].iterrows():
    #print(baseline_fewshot_prompt.format(examples=pfx_fewshot_examples,Incidental_Finding=row['Incidental_Finding']))
    prompt = baseline_zeroshot_prompt.format(Incidental_Finding=row['Incidental_Finding'],Reading_Level=TENTH_TO_TWELTH_GRADE)
    pfx_response = client.chat.completions.create(
        #model="gpt-4o-mini",
        model="gpt-4o",
        temperature=0.0,
        messages=[
            {"role": "system", "content": "You are a medical doctor rephrasing and explaining medical terminology to a patient." },
            {"role": "user", "content": prompt }
        ],
        stream=False,
    )
    
    pfx_zeroshot_outputs.append(pfx_response.choices[0])

In [11]:
pfx_zeroshot_outputs_json = list(map(extract_json,pfx_zeroshot_outputs))
#pfx_zeroshot_outputs_json


In [12]:
pfx_zeroshot_output_df = pd.DataFrame(pfx_zeroshot_outputs_json)
pfx_zeroshot_outputs_icd10_labels = label_icd10s(pfx_zeroshot_outputs_json)

In [13]:
pfx_zeroshot_output_df['_0_agent_icd10_codes'] = list(map(lambda x: list(x.values())[0] if x else "", pfx_zeroshot_outputs_icd10_labels))

In [14]:
pfx_zeroshot_output_df["_0_icd10_matches"]= pfx_zeroshot_output_df.ICD10_code == pfx_zeroshot_output_df._0_agent_icd10_codes
pfx_zeroshot_output_df["_0_flesch"] = pfx_zeroshot_output_df['PFx'].apply(textstat.flesch_reading_ease)
pfx_zeroshot_output_df["_0_reading_level"] = pfx_zeroshot_output_df['_0_flesch'].apply(map_reading_level)
pfx_zeroshot_output_df["_0_reading_level_matches"] = pfx_zeroshot_output_df['_0_reading_level'] == TENTH_TO_TWELTH_GRADE

In [15]:
pfx_zeroshot_output_df.head()


Unnamed: 0,finding,ICD10_code,PFx,PFx_ICD10_code,_0_agent_icd10_codes,_0_icd10_matches,_0_flesch,_0_reading_level,_0_reading_level_matches
0,White matter lesions,R90.82,White matter lesions are small areas of damage...,R90.82,R90.82,True,50.16,10th to 12th grade,True
1,Arachnoid cyst,G93.0,An arachnoid cyst is a fluid-filled sac that f...,G93.0,Q04.6,False,67.59,8th to 9th grade,False
2,Pituitary microadenoma,D35.2,"A pituitary microadenoma is a small, non-cance...",D35.2,D35.2,True,50.87,10th to 12th grade,True
3,Pineal cyst,Q04.6,"A pineal cyst is a small, fluid-filled sac loc...",Q04.6,Q04.8,False,67.79,8th to 9th grade,False
4,Chiari I malformation,Q07.0,Chiari I malformation is a condition where the...,Q07.0,Q07.0,True,72.56,7th grade,False


In [16]:
#icd10 accuracy -- at least according to GPT4o
sum(pfx_zeroshot_output_df["_0_icd10_matches"])/len(pfx_zeroshot_output_df.index)

0.84

In [17]:
pfx_zeroshot_output_df[pfx_zeroshot_output_df._0_reading_level != TENTH_TO_TWELTH_GRADE].count()

finding                     19
ICD10_code                  19
PFx                         19
PFx_ICD10_code              19
_0_agent_icd10_codes        19
_0_icd10_matches            19
_0_flesch                   19
_0_reading_level            19
_0_reading_level_matches    19
dtype: int64

In [18]:
pfx_zeroshot_outputs_w_reflexion = []
for i,row in pfx_zeroshot_output_df.iloc[:].iterrows():
    #print(baseline_fewshot_prompt.format(examples=pfx_fewshot_examples,Incidental_Finding=row['Incidental_Finding']))
    if row['_0_reading_level'] != TENTH_TO_TWELTH_GRADE:
        """```{{"Incidental_Finding":"{Incidental_Finding}", "ICD10_code":"{ICD10_code}", "PFx":"{PFx}", "PFx_ICD10_code":"{PFx_ICD10_code}"}}```
</PFx>

The PFx above was at a {_0_reading_level} instead of a {Reading_Level} Flesch-Kincaid reading level. Please try again.
""" 
        prompt = zeroshot_prompt_reflexion_reading_level_error.format(
            Incidental_Finding=row['finding'],
            Reading_Level=TENTH_TO_TWELTH_GRADE,
            ICD10_code=row['ICD10_code'],
            _0_reading_level=row['_0_reading_level'],
            PFx=row['PFx'],
            PFx_ICD10_code=row['PFx_ICD10_code'])
        pfx_response = client.chat.completions.create(
            #model="gpt-4o-mini",
            model="gpt-4o",
            temperature=0.0,
            messages=[
                {"role": "system", "content": "You are a medical doctor rephrasing and explaining medical terminology to a patient." },
                {"role": "user", "content": prompt }
            ],
            stream=False,
        )
        pfx_zeroshot_outputs_w_reflexion.append(pfx_response.choices[0])
    else:
        pfx_zeroshot_outputs_w_reflexion.append(None)

In [19]:
pfx_zeroshot_outputs_w_reflexion_json = list(map(extract_json,filter(lambda x: x,pfx_zeroshot_outputs_w_reflexion)))

In [20]:
pfx_zeroshot_outputs_w_reflexion_df = pd.DataFrame(pfx_zeroshot_outputs_w_reflexion_json)
pfx_zeroshot_outputs_w_reflexion_icd10_labels = label_icd10s(pfx_zeroshot_outputs_w_reflexion_json)

In [22]:
pfx_zeroshot_outputs_w_reflexion_df['_1_agent_icd10_codes'] = list(map(lambda x: list(x.values())[0] if x else "", pfx_zeroshot_outputs_w_reflexion_icd10_labels))
pfx_zeroshot_outputs_w_reflexion_df["_1_icd10_matches"]= pfx_zeroshot_outputs_w_reflexion_df.ICD10_code == pfx_zeroshot_outputs_w_reflexion_df._1_agent_icd10_codes
pfx_zeroshot_outputs_w_reflexion_df["_1_flesch"] = pfx_zeroshot_outputs_w_reflexion_df['PFx'].apply(textstat.flesch_reading_ease)
pfx_zeroshot_outputs_w_reflexion_df["_1_reading_level"] = pfx_zeroshot_outputs_w_reflexion_df['_1_flesch'].apply(map_reading_level)
pfx_zeroshot_outputs_w_reflexion_df["_1_reading_level_matches"] = pfx_zeroshot_outputs_w_reflexion_df['_1_reading_level'] == TENTH_TO_TWELTH_GRADE
pfx_zeroshot_outputs_w_reflexion_df.head()


Unnamed: 0,finding,ICD10_code,PFx,PFx_ICD10_code,_1_agent_icd10_codes,_1_icd10_matches,_1_flesch,_1_reading_level,_1_reading_level_matches
0,Arachnoid cyst,G93.0,An arachnoid cyst is a fluid-filled sac that d...,G93.0,G93.0,True,49.65,College,False
1,Pineal cyst,Q04.6,"A pineal cyst is a small, fluid-filled sac tha...",Q04.6,Q04.8,False,53.0,10th to 12th grade,True
2,Chiari I malformation,Q07.0,Chiari I malformation is a structural defect i...,Q07.0,Q07.0,True,61.26,8th to 9th grade,False
3,Thyroid nodule,E04.1,"A thyroid nodule is a small, abnormal growth w...",R22.1,E04.1,True,42.92,College,False
4,Cervical lymphadenopathy,R59.0,Cervical lymphadenopathy refers to the swellin...,R59.0,R59.0,True,44.75,College,False


In [23]:
pfx_zeroshot_outputs_w_reflexion_df[pfx_zeroshot_outputs_w_reflexion_df._1_reading_level != TENTH_TO_TWELTH_GRADE].count()

finding                     15
ICD10_code                  15
PFx                         15
PFx_ICD10_code              15
_1_agent_icd10_codes        15
_1_icd10_matches            15
_1_flesch                   15
_1_reading_level            15
_1_reading_level_matches    15
dtype: int64

In [24]:
sum(pfx_zeroshot_outputs_w_reflexion_df["_1_icd10_matches"])/len(pfx_zeroshot_outputs_w_reflexion_df.index)

0.8421052631578947

In [26]:
pfx_fewshot_examples = ""
for i,row in df_fewshot.iterrows():
    pfx_fewshot_examples += example.format(**row)

pfx_fewshot_outputs = []
for i,row in df_eval.iloc[:].iterrows():
    #print(baseline_fewshot_prompt.format(examples=pfx_fewshot_examples,Incidental_Finding=row['Incidental_Finding']))
    prompt = baseline_fewshot_prompt.format(examples=pfx_fewshot_examples,Incidental_Finding=row['Incidental_Finding'],Reading_Level=TENTH_TO_TWELTH_GRADE)
    pfx_response = client.chat.completions.create(
        #model="gpt-4o-mini",
        model="gpt-4o",
        temperature=0.0,
        messages=[
            {"role": "system", "content": "You are a medical doctor rephrasing and explaining medical terminology to a patient." },
            {"role": "user", "content": prompt }
        ],
        stream=False,
    )
    
    pfx_fewshot_outputs.append(pfx_response.choices[0])

In [27]:
pfx_fewshot_outputs_json = list(map(extract_json,pfx_fewshot_outputs))
pfx_fewshot_outputs_icd10_labels = label_icd10s(pfx_fewshot_outputs_json)

In [28]:
pfx_fewshot_output_df = pd.DataFrame(pfx_fewshot_outputs_json)
pfx_fewshot_output_df['_0_agent_icd10_codes'] = list(map(lambda x: list(x.values())[0] if x else "",pfx_fewshot_outputs_icd10_labels))
pfx_fewshot_output_df["_0_icd10_matches"]= pfx_fewshot_output_df.ICD10_code == pfx_fewshot_output_df._0_agent_icd10_codes
pfx_fewshot_output_df["_0_flesch"] = pfx_fewshot_output_df['PFx'].apply(textstat.flesch_reading_ease)
pfx_fewshot_output_df["_0_reading_level"] = pfx_fewshot_output_df['_0_flesch'].apply(map_reading_level)

In [29]:
sum(pfx_fewshot_output_df["_0_icd10_matches"])/len(pfx_fewshot_output_df.index)

0.8

In [30]:
pfx_fewshot_output_df.head()

Unnamed: 0,Incidental_Finding,ICD10_code,PFx,PFx_ICD10_code,_0_agent_icd10_codes,_0_icd10_matches,_0_flesch,_0_reading_level
0,White matter lesions,R90.82,"White matter lesions, often found incidentally...",R90.82,R90.82,True,39.37,College
1,Arachnoid cyst,Q04.3,An arachnoid cyst is a fluid-filled sac that i...,Q04.3,Q04.6,False,55.03,10th to 12th grade
2,Pituitary microadenoma,D35.2,"A pituitary microadenoma is a small, benign tu...",D35.2,D35.2,True,30.2,College
3,Pineal cyst,Q04.6,A pineal cyst is a fluid-filled sac located in...,Q04.6,Q04.8,False,50.87,10th to 12th grade
4,Chiari I malformation,Q07.0,A Chiari I malformation is a condition where t...,Q07.0,Q07.0,True,30.7,College


In [31]:
pfx_fewshot_output_df[pfx_fewshot_output_df._0_reading_level != TENTH_TO_TWELTH_GRADE].count()

Incidental_Finding      18
ICD10_code              18
PFx                     18
PFx_ICD10_code          18
_0_agent_icd10_codes    18
_0_icd10_matches        18
_0_flesch               18
_0_reading_level        18
dtype: int64

In [34]:
pfx_fewshot_output_df[pfx_fewshot_output_df._0_flesch < 50].count()

Incidental_Finding      16
ICD10_code              16
PFx                     16
PFx_ICD10_code          16
_0_agent_icd10_codes    16
_0_icd10_matches        16
_0_flesch               16
_0_reading_level        16
dtype: int64