In [1]:
### PFX prototyping

In [1]:
import pandas as pd
import os
from readability import Readability
import textstat
from openai import OpenAI
import json
import re
from pfx_prompts import baseline_fewshot_prompt,example,baseline_fewshot_icd10_labeling_prompt,icd10_example,baseline_zeroshot_prompt
client = OpenAI()

In [2]:
# reading levels
PROFESSIONAL = "Professional"
COLLEGE_GRADUATE = "College graduate"
COLLEGE = "College"
TENTH_TO_TWELTH_GRADE = "10th to 12th grade"
EIGTH_TO_NINTH_GRADE = "8th to 9th grade"
SEVENTH_GRADE = "7th grade"
SIXTH_GRADE = "6th grade"
FIFTH_GRADE = "5th grade"
N_A = "N/A"


In [3]:
df_fewshot = pd.read_csv('pfx_fewshot_examples_college.csv')

In [4]:
df_fewshot.head()


Unnamed: 0,Body_Part,Incidental_Finding,ICD10_code,PFx,PFx_ICD10_code
0,Head,Cerebral cavernous malformation,I67.1,A small cavernous malformation of the brain is...,I67.1
1,Head,Infundibulum of cerebral artery,Q28.2,An infundibulum detected on an MRI of the brai...,Q28.2
2,Neck,Tornwaldt cyst,J39.2,"A Tornwaldt cyst, often detected incidentally ...",J39.2
3,Head,Fluid in the right petrous apex,H70.8,A small amount of incidentally found fluid in ...,H70.8
4,Abdomen,Spinal hemangioma,D18.09,"A spinal hemangioma, often found incidentally ...",D18.09


In [5]:
df_eval = pd.read_csv('pfx_evaluation_data.csv')

In [6]:
df_eval.head()

Unnamed: 0,Body_Part,Incidental_Finding,ICD10_code,Unnamed: 3
0,Head,White matter lesions,R90.82,
1,Head,Arachnoid cyst,Q04.3,
2,Head,Pituitary microadenoma,D35.2,
3,Head,Pineal cyst,Q04.6,
4,Head,Chiari I malformation,Q07.0,


In [7]:
# https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease
def map_reading_level(flesch_reading_ease):
    if flesch_reading_ease < 10:
        return PROFESSIONAL
    elif 10.0 <= flesch_reading_ease < 30.0:
        return COLLEGE_GRADUATE
    elif 30.0 <= flesch_reading_ease < 50.0:
        return COLLEGE
    elif 50.0 <= flesch_reading_ease < 60.0:
        return TENTH_TO_TWELTH_GRADE
    elif 60.0 <= flesch_reading_ease < 70.0:
        return EIGTH_TO_NINTH_GRADE
    elif 70.0 <= flesch_reading_ease < 80.0:
        return SEVENTH_GRADE
    elif 80.0 <= flesch_reading_ease < 90.0:
        return SIXTH_GRADE
    elif 90.0 <= flesch_reading_ease < 100.0:
        return FIFTH_GRADE 
    else:
        return N_A

In [8]:
def extract_json(openai_response):
    json_match = re.search(r'```.*(\{.*?\}).*```', openai_response.message.content, re.DOTALL)
    if json_match:
        json_str = json_match.group(1)
        # Convert the extracted string to a JSON object
        json_object = json.loads(json_str.replace('\n',''))
        #print(json_object)
        return json_object
    else:
        print("No JSON object found.") 
        return dict()

In [9]:
def label_icd10s(pfx_outputs_json):
    pfx_icd10_fewshot_examples = ""
    for i,row in df_fewshot.iterrows():
        pfx_icd10_fewshot_examples += icd10_example.format(**row)
    
    pfx_icd10_codes = []
    for pfx_output in pfx_outputs_json:
        try:
            prompt = baseline_fewshot_icd10_labeling_prompt.format(examples=pfx_icd10_fewshot_examples,PFx=pfx_output['PFx'])
        except:
            print(pfx_output)
        pfx_icd10_response = client.chat.completions.create(
                #model="gpt-4o-mini",
                model="gpt-4o",
                temperature=0.0,
                messages=[
                    {"role": "system", "content": "You are an ICD10 medical coder for incidental findings." },
                    {"role": "user", "content": prompt }
                ],
                stream=False,
            )
        pfx_icd10_codes.append(pfx_icd10_response.choices[0])
    return list(map(extract_json,pfx_icd10_codes))

In [10]:
pfx_zeroshot_outputs = []
for i,row in df_eval.iloc[:].iterrows():
    #print(baseline_fewshot_prompt.format(examples=pfx_fewshot_examples,Incidental_Finding=row['Incidental_Finding']))
    prompt = baseline_zeroshot_prompt.format(Incidental_Finding=row['Incidental_Finding'],Reading_Level=TENTH_TO_TWELTH_GRADE)
    pfx_response = client.chat.completions.create(
        #model="gpt-4o-mini",
        model="gpt-4o",
        temperature=0.0,
        messages=[
            {"role": "system", "content": "You are a medical doctor rephrasing and explaining medical terminology to a patient." },
            {"role": "user", "content": prompt }
        ],
        stream=False,
    )
    
    pfx_zeroshot_outputs.append(pfx_response.choices[0])

In [11]:
pfx_zeroshot_outputs_json = list(map(extract_json,pfx_zeroshot_outputs))
#pfx_zeroshot_outputs_json


In [12]:
pfx_zeroshot_output_df = pd.DataFrame(pfx_zeroshot_outputs_json)
pfx_zeroshot_outputs_icd10_labels = label_icd10s(pfx_zeroshot_outputs_json)

In [13]:
pfx_zeroshot_output_df['_0_agent_icd10_codes'] = list(map(lambda x: list(x.values())[0] if x else "", pfx_zeroshot_outputs_icd10_labels))

In [14]:
pfx_zeroshot_output_df["_0_icd10_matches"]= pfx_zeroshot_output_df.ICD10_code == pfx_zeroshot_output_df._0_agent_icd10_codes
pfx_zeroshot_output_df["_0_flesch"] = pfx_zeroshot_output_df['PFx'].apply(textstat.flesch_reading_ease)
pfx_zeroshot_output_df["_0_reading_level"] = pfx_zeroshot_output_df['0_flesch'].apply(map_reading_level)
pfx_zeroshot_output_df["_0_reading_level_matches"] = pfx_zeroshot_output_df['0_reading_level'] == TENTH_TO_TWELTH_GRADE

KeyError: '0_flesch'

In [None]:
pfx_zeroshot_output_df.head()


In [None]:
#icd10 accuracy -- at least according to GPT4o
sum(pfx_zeroshot_output_df["_0_icd10_matches"])/len(pfx_zeroshot_output_df.index)

In [None]:
pfx_fewshot_examples = ""
for i,row in df_fewshot.iterrows():
    pfx_fewshot_examples += example.format(**row)

pfx_fewshot_outputs = []
for i,row in df_eval.iloc[:].iterrows():
    #print(baseline_fewshot_prompt.format(examples=pfx_fewshot_examples,Incidental_Finding=row['Incidental_Finding']))
    prompt = baseline_fewshot_prompt.format(examples=pfx_fewshot_examples,Incidental_Finding=row['Incidental_Finding'],Reading_Level=TENTH_TO_TWELTH_GRADE)
    pfx_response = client.chat.completions.create(
        #model="gpt-4o-mini",
        model="gpt-4o",
        temperature=0.0,
        messages=[
            {"role": "system", "content": "You are a medical doctor rephrasing and explaining medical terminology to a patient." },
            {"role": "user", "content": prompt }
        ],
        stream=False,
    )
    
    pfx_fewshot_outputs.append(pfx_response.choices[0])

In [None]:
pfx_fewshot_outputs_json = list(map(extract_json,pfx_fewshot_outputs))
pfx_fewshot_outputs_icd10_labels = label_icd10s(pfx_fewshot_outputs_json)

In [None]:
pfx_fewshot_output_df = pd.DataFrame(pfx_fewshot_outputs_json)
pfx_fewshot_output_df['_0_agent_icd10_codes'] = list(map(lambda x: list(x.values())[0] if x else "",pfx_fewshot_outputs_icd10_labels))
pfx_fewshot_output_df["_0_icd10_matches"]= pfx_fewshot_output_df.ICD10_code == pfx_fewshot_output_df._0_agent_icd10_codes
pfx_fewshot_output_df["_0_flesch"] = pfx_fewshot_output_df['PFx'].apply(textstat.flesch_reading_ease)
pfx_fewshot_output_df["_0_reading_level"] = pfx_fewshot_output_df['0_flesch'].apply(map_reading_level)

In [None]:
sum(pfx_fewshot_output_df["_0_icd10_matches"])/len(pfx_fewshot_output_df.index)

In [None]:
pfx_fewshot_output_df.head()