In [1]:
### PFX prototyping

In [2]:
import pandas as pd
import os
import readability
from openai import OpenAI
import json
import re
from pfx_fewshots import baseline_fewshot_prompt,example,baseline_fewshot_icd10_labeling_prompt,icd10_example
client = OpenAI()
os.chdir(os.path.expanduser("../"))

In [3]:
df_fewshot = pd.read_csv('pfx_fewshot_examples_college.csv')

In [4]:
df_fewshot.head()


Unnamed: 0,Body_Part,Incidental_Finding,ICD10_code,PFx,PFx_ICD10_code
0,Head,Cerebral cavernous malformation,I67.1,A small cavernous malformation of the brain is...,I67.1
1,Head,Infundibulum of cerebral artery,Q28.2,An infundibulum detected on an MRI of the brai...,Q28.2
2,Neck,Tornwaldt cyst,J39.2,"A Tornwaldt cyst, often detected incidentally ...",J39.2
3,Head,Fluid in the right petrous apex,H70.8,A small amount of incidentally found fluid in ...,H70.8
4,Abdomen,Spinal hemangioma,D18.09,"A spinal hemangioma, often found incidentally ...",D18.09


In [7]:
df_eval = pd.read_csv('pfx_evaluation_data.csv')

In [8]:
df_eval.head()

Unnamed: 0,Body_Part,Incidental_Finding,ICD10_code,Unnamed: 3
0,Head,White matter lesions,R90.82,
1,Head,Arachnoid cyst,Q04.3,
2,Head,Pituitary microadenoma,D35.2,
3,Head,Pineal cyst,Q04.6,
4,Head,Chiari I malformation,Q07.0,


In [9]:
print(icd10_example.format(**df_fewshot.iloc[0]))

<PFx>
A small cavernous malformation of the brain is a collection of abnormal blood vessels that is typically benign and often discovered incidentally when imaging is done. Patients should understand that these small malformations usually do not cause symptoms and usually do not require treatment. However, it's important to have periodic follow-ups with a neurologist to monitor for any changes, as there is a slight risk of bleeding which could lead to symptoms like headaches or seizures. If symptoms develop or if there are concerns about the malformation's behavior, the neurologist may discuss management options, which could range from observation to, in rare cases, surgical intervention.
</PFx>
<PFx_ICD10_code>
```{"PFx_ICD10_code":"I67.1"}```
</PFx_ICD10_code>




In [12]:
def extract_json(openai_response):
    json_match = re.search(r'```.*(\{.*?\}).*```', openai_response.message.content, re.DOTALL)
    if json_match:
        json_str = json_match.group(1)
        # Convert the extracted string to a JSON object
        json_object = json.loads(json_str)
        #print(json_object)
        return json_object
    else:
        print("No JSON object found.") 
        return dict()

In [13]:
pfx_fewshot_examples = ""
for i,row in df_fewshot.iterrows():
    pfx_fewshot_examples += example.format(**row)

    
pfx_outputs = []
for i,row in df_eval.iloc[:].iterrows():
    #print(baseline_fewshot_prompt.format(examples=pfx_fewshot_examples,Incidental_Finding=row['Incidental_Finding']))
    prompt = baseline_fewshot_prompt.format(examples=pfx_fewshot_examples,Incidental_Finding=row['Incidental_Finding'])
    pfx_response = client.chat.completions.create(
        #model="gpt-4o-mini",
        model="gpt-4o",
        temperature=0.0,
        messages=[
            {"role": "system", "content": "You are a medical doctor rephrasing and explaining medical terminology to a patient." },
            {"role": "user", "content": prompt }
        ],
        stream=False,
    )
    
    pfx_outputs.append(pfx_response.choices[0])

In [14]:
print(pfx_icd10_fewshot_examples)

<PFx>
A small cavernous malformation of the brain is a collection of abnormal blood vessels that is typically benign and often discovered incidentally when imaging is done. Patients should understand that these small malformations usually do not cause symptoms and usually do not require treatment. However, it's important to have periodic follow-ups with a neurologist to monitor for any changes, as there is a slight risk of bleeding which could lead to symptoms like headaches or seizures. If symptoms develop or if there are concerns about the malformation's behavior, the neurologist may discuss management options, which could range from observation to, in rare cases, surgical intervention.
</PFx>
<PFx_ICD10_code>
```{"PFx_ICD10_code":"I67.1"}```
</PFx_ICD10_code>

<PFx>
An infundibulum detected on an MRI of the brain is a benign and typically incidental finding characterized by a funnel-shaped dilation at the origin of a cerebral artery. Patients should be reassured that an infundibulum

In [15]:
pfx_outputs_json = list(map(extract_json,pfx_outputs))

In [16]:
pfx_outputs_json[0]

{'finding': 'White matter lesions',
 'ICD10_code': 'R90.82',
 'PFx': "White matter lesions, often found incidentally on an MRI of the brain, are small areas of damage or changes in the brain's white matter. Patients should know that these lesions are common, especially as people age, and they often do not cause any symptoms. While they can be associated with conditions like migraines, high blood pressure, or small vessel disease, they usually do not require treatment unless they are linked to specific symptoms or conditions. It's important to discuss the findings with your doctor to understand their significance in your particular case.",
 'PFx_ICD10_code': 'R90.82'}

In [17]:
pfx_icd10_fewshot_examples = ""
for i,row in df_fewshot.iterrows():
    pfx_icd10_fewshot_examples += icd10_example.format(**row)
    
pfx_icd10_codes = []
for pfx_output in pfx_outputs_json:
    prompt = baseline_fewshot_icd10_labeling_prompt.format(examples=pfx_icd10_fewshot_examples,PFx=pfx_output['PFx'])
    pfx_icd10_response = client.chat.completions.create(
            #model="gpt-4o-mini",
            model="gpt-4o",
            temperature=0.0,
            messages=[
                {"role": "system", "content": "You are an ICD10 medical coder for incidental findings." },
                {"role": "user", "content": prompt }
            ],
            stream=False,
        )
    pfx_icd10_codes.append(pfx_icd10_response.choices[0])
    
    


In [18]:
print(pfx_icd10_codes)

[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='```json\n{"ICD10_code":"R90.82"}\n```', role='assistant', function_call=None, tool_calls=None, refusal=None)), Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='```json\n{"PFx_ICD10_code":"Q04.6"}\n```', role='assistant', function_call=None, tool_calls=None, refusal=None)), Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='```json\n{"ICD10_code":"D35.2"}\n```', role='assistant', function_call=None, tool_calls=None, refusal=None)), Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='```json\n{"ICD10_code":"D33.1"}\n```', role='assistant', function_call=None, tool_calls=None, refusal=None)), Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='```json\n{"PFx_ICD10_code":"Q07.0"}\n```', role='assistant', function_call=None, tool_calls=No

In [19]:
pfx_icd10_codes_json = list(map(extract_json,pfx_icd10_codes))

In [20]:
pfx_icd10_codes_json

[{'ICD10_code': 'R90.82'},
 {'PFx_ICD10_code': 'Q04.6'},
 {'ICD10_code': 'D35.2'},
 {'ICD10_code': 'D33.1'},
 {'PFx_ICD10_code': 'Q07.0'},
 {'ICD10_code': 'E04.1'},
 {'ICD10_code': 'R59.0'},
 {'PFx_ICD10_code': 'K11.8'},
 {'ICD10_code': 'I65.2'},
 {'ICD10_code': 'M50.20'},
 {'PFx_ICD10_code': 'R91.1'},
 {'ICD10_code': 'R59.0'},
 {'ICD10_code': 'K44.9'},
 {'ICD10_code': 'I25.10'},
 {'ICD10_code': 'I31.3'},
 {'ICD10_code': 'K76.89'},
 {'PFx_ICD10_code': 'N28.1'},
 {'ICD10_code': 'D35.00'},
 {'PFx_ICD10_code': 'K86.2'},
 {'PFx_ICD10_code': 'R16.1'},
 {'ICD10_code': 'N83.0'},
 {'ICD10_code': 'D25.9'},
 {'ICD10_code': 'N40.1'},
 {'ICD10_code': 'R59.0'},
 {'ICD10_code': 'N32.3'}]

In [22]:
output_df = pd.DataFrame(pfx_outputs_json)

In [33]:
output_df['agent_icd10_codes'] = list(map(lambda x: list(x.values())[0],pfx_icd10_codes_json))

In [34]:
output_df.head()

Unnamed: 0,finding,ICD10_code,PFx,PFx_ICD10_code,agent_icd10_codes
0,White matter lesions,R90.82,"White matter lesions, often found incidentally...",R90.82,R90.82
1,Arachnoid cyst,Q04.6,An arachnoid cyst is a fluid-filled sac that i...,Q04.6,Q04.6
2,Pituitary microadenoma,D35.2,"A pituitary microadenoma is a small, benign tu...",D35.2,D35.2
3,Pineal cyst,Q04.6,"A pineal cyst, often detected incidentally on ...",Q04.6,D33.1
4,Chiari I malformation,Q07.0,A Chiari I malformation is a condition where t...,Q07.0,Q07.0


In [36]:
list(map(lambda x: x[0]==x[1],zip([1,1], [1,2])))

[True, False]

In [37]:
list(map(lambda x: x[0]==x[1],zip(output_df['ICD10_code'],output_df['agent_icd10_codes'])))

[True,
 True,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 False,
 False,
 True]

In [38]:
sum(_)/len(_)

0.84