In [None]:
import pandas as pd
from tqdm import tqdm
import os
import requests
import json
from pprint import pprint

APIM_KEY = ""
MODEL = 'gpt-4.1'

url = ""

In [None]:
dataset_path = ''

export_path = ''


# functions

In [None]:
def parse_findings(report):
    if 'FINDINGS:' in report and 'IMPRESSION:' in report:
        start = report.index('FINDINGS:')
        end = report.index('IMPRESSION:')
        if start < end:
            findings = report[start:end]
            return findings
        else:
            return None
        
    elif 'FINDINGS' in report and 'IMPRESSION' in report:
        start = report.index('FINDINGS')
        end = report.index('IMPRESSION')
        if start < end:
            findings = report[start:end]
            return findings
        else:
            return None
        
    elif 'Findings:' in report and 'Impression:' in report:
        start = report.index('Findings:')
        end = report.index('Impression:')
        if start < end:
            findings = report[start:end]
            return findings
        else:
            return None

    else:
        return None

# main codes

In [None]:
df = pd.read_excel(dataset_path)
print(df.shape)
print(df.columns)

In [None]:
intro = 'The following is an ultrasound radiology report:'

instruction = """
INSTRUCTIONS:
Extract the following information from the report:
- cystic artery velocity (cm/s). If multiple values are reported, provide the maximum value.
- hepatic artery velocity (cm/s). If multiple values are reported, provide the maximum value.
- hepatic artery resistive index (RI) (no units)
- gallbladder wall thickness (mm). If multiple values are reported, provide the maximum value.
- gallbladder wall thickening ('thickened'/'thickening', 'not thickened'/'no thickening'/'normal', 'equivocal'/'borderline'/'mild'/'minimal'/'slight'/'questionable'/'upper limit of normal'/'top normal', or 'not reported'). Do not infer from the gallbladder wall thickness numerical value.
- gallbladder wall hyperemia (present, absent, equivocal, or not reported)
- pericholecystic fluid (present, absent, or not reported)
- gallbladder sludge / biliary sludge (present, absent, or not reported)
- sonographic Murphy sign (positive, negative, limited evaluation, or not reported). If sonographic Murphy sign is negative but the patient received pain medications (analgesics), report as "limited evaluation".
- gallbladder size (cm). If multiple dimensions are measured, report as x*y*z cm, with x being the largest dimension, followed by y and z.
- gallbladder distention (distended, not distended, equivocal, or not reported). Do not infer from the gallbladder size numerical values.
- gallbladder perforation (present, absent, equivocal, or not reported)
- gallstone/cholelithiasis (present, absent, or not reported)
- gallstone/cholelithiasis size (cm). If multiple values are reported, provide the maximum value.
- gallstone/cholelithiasis number (single, multiple, or not reported)
- gallstone/cholelithiasis location (gallbladder neck, gallbladder body, gallbladder fundus, gallbladder infundibulum, cystic duct, other location, or not reported)
- common bile duct (CBD) size (mm). If multiple values are reported, provide the maximum value.
- CBD stone (present, absent, or not reported)
- CBD stone size (mm). If multiple values are reported, provide the maximum value.
- gallbladder polyp (present, absent, or not reported)
- gallbladder polyp size (mm). If multiple values are reported, provide the maximum value.
- gallbladder polyp number (single, multiple, or not reported)
If any of the above parameters are not available in the report, indicate with “not reported”.
Also provide your interpretation of the likelihood of acute cholecystitis based on the findings in the report (yes, no, or equivocal).
Provide the answer in json format with the following keys: cystic_artery_velocity, hepatic_artery_velocity, hepatic_artery_resistive_index, gallbladder_wall_thickness, gallbladder_wall_thickening, gallbladder_wall_hyperemia, pericholecystic_fluid, gallbladder_sludge, sonographic_murphys_sign, gallbladder_size, gallbladder_distention, gallbladder_perforation, gallstone, gallstone_size, gallstone_number, gallstone_location, cbd_size, cbd_stone, cbd_stone_size, gallbladder_polyp, gallbladder_polyp_size, gallbladder_polyp_number, likelihood_of_acute_cholecystitis. Exclude units from the values in the json response.
"""

In [None]:

for row_idx in tqdm(df.index):

    try:

        if 'API_response' in df.columns:
            if pd.notna(df.loc[row_idx, 'API_response']):
                continue

        if pd.notna(df.loc[row_idx, 'deidentified_US_report']):
            findings = parse_findings(df.loc[row_idx, 'deidentified_US_report'])
        elif pd.notna(df.loc[row_idx, 'Scrubbed Text']):
            findings = parse_findings(df.loc[row_idx, 'Scrubbed Text'])
        else:
            continue

        if findings is None:
            if 'findings_manually_copied' in df.columns:
                if pd.notna(df.loc[row_idx, 'findings_manually_copied']):
                    findings = df.loc[row_idx, 'findings_manually_copied']
                else:
                    print(f'No findings found for row {row_idx}')
                    continue
            else:
                print(f'No findings found for row {row_idx}')
                continue

        prompt = intro + '\n\n' + findings + '\n' + instruction

        payload = json.dumps({
            "model": MODEL,
            "messages": [
                {
                    "role": "system", 
                    "content": "You are a helpful assistant."
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ]
        })

        headers = {
            'Ocp-Apim-Subscription-Key': APIM_KEY,
            'Content-Type': 'application/json'
        }

        response = requests.request("POST", url, headers=headers, data=payload)
        response_dict = json.loads(response.text)

        df.loc[row_idx, 'API_response'] = response_dict['choices'][0]['message']['content']

    except Exception as e:
        print(f'Error at row {row_idx}')
        print(e)



In [None]:
df.to_excel(export_path, index=False)