## 📚 Prerequisites

Ensure that your Azure Services are properly set up, your Conda environment is created, and your environment variables are configured as per the instructions in the [README.md](README.md) file.

In [1]:
import os

# Define the target directory
target_directory = r"C:\Users\pablosal\Desktop\gbb-ai-hls-factory-prior-auth"  # change your directory here

# Check if the directory exists
if os.path.exists(target_directory):
    # Change the current working directory
    os.chdir(target_directory)
    print(f"Directory changed to {os.getcwd()}")
else:
    print(f"Directory {target_directory} does not exist.")

Directory changed to C:\Users\pablosal\Desktop\gbb-ai-hls-factory-prior-auth


In [2]:
import json
from pathlib import Path
import pandas as pd


def save_dictionary_to_file(dictionary, file_path):
    with open(file_path, "w") as file:
        json.dump(dictionary, file, indent=4)


def load_dictionary_from_file(file_path):
    with open(file_path, "r") as file:
        return json.load(file)


def build_ground_truth_dataset(data_folder, categories):
    file_info_dict = {}
    for case_folder in data_folder.iterdir():
        if case_folder.is_dir():
            process_case_folder(case_folder, file_info_dict, categories)
    return file_info_dict


def initialize_file_info_dict(categories):
    return {cat: [] for cat in categories}


def process_category_folder(category_folder, file_info_dict, case_eval_id):
    for pdf_file in category_folder.glob("*.pdf"):
        file_info_dict[case_eval_id][category_folder.name].append(str(pdf_file))


def extract_results_data(results_file, file_info_dict, case_eval_id):
    with open(results_file, "r") as f:
        results_data = json.load(f)
        file_info_dict[case_eval_id]["evaluation_time"] = results_data.get(
            "evaluation_time", "N/A"
        )
        file_info_dict[case_eval_id]["decision"] = results_data.get("decision", "N/A")
        file_info_dict[case_eval_id]["notes"] = results_data.get("notes", "N/A")


def process_evaluation_folder(eval_folder, file_info_dict, case_eval_id, categories):
    file_info_dict[case_eval_id] = initialize_file_info_dict(categories)
    file_info_dict[case_eval_id]["evaluation_time"] = "N/A"
    file_info_dict[case_eval_id]["decision"] = "N/A"
    file_info_dict[case_eval_id]["notes"] = "N/A"

    for category_folder in eval_folder.iterdir():
        if category_folder.is_dir() and category_folder.name in categories:
            process_category_folder(category_folder, file_info_dict, case_eval_id)

    results_file = eval_folder / "results.json"
    if results_file.exists():
        extract_results_data(results_file, file_info_dict, case_eval_id)


def process_case_folder(case_folder, file_info_dict, categories):
    case_id = case_folder.name
    for eval_folder in case_folder.iterdir():
        if eval_folder.is_dir() and eval_folder.name.isalpha():
            eval_id = eval_folder.name
            case_eval_id = f"{case_id}_{eval_id}"
            process_evaluation_folder(
                eval_folder, file_info_dict, case_eval_id, categories
            )

In [3]:
data_folder = Path("utils/data/cases")
categories = ["doctor_notes", "imaging", "labs", "pa_form", "policies"]
file_info_dict = build_ground_truth_dataset(data_folder, categories)

save_path = data_folder / "ground_truth.json"
save_dictionary_to_file(file_info_dict, save_path)

In [4]:
loaded_dict = load_dictionary_from_file(save_path)
df = pd.DataFrame.from_dict(loaded_dict, orient="index")
df.head()

Unnamed: 0,doctor_notes,imaging,labs,pa_form,policies,evaluation_time,decision,notes
001_a,[utils\data\cases\001\a\doctor_notes\01_a_note...,[utils\data\cases\001\a\imaging\01_a_imaging.pdf],[utils\data\cases\001\a\labs\01_a_labs.pdf],[utils\data\cases\001\a\pa_form\01_a_form.pdf],[],2023-10-01T12:00:00Z,rejected,Manually evaluated by MD based on the policies.
001_b,[utils\data\cases\001\b\doctor_notes\01_b_note...,[utils\data\cases\001\b\imaging\01_b_imaging.pdf],[utils\data\cases\001\b\labs\01_b_labs.pdf],[utils\data\cases\001\b\pa_form\01_b_form.pdf],[],2023-10-01T12:00:00Z,approved,Manually evaluated by MD based on the policies.
002_a,[utils\data\cases\002\a\doctor_notes\002_a (no...,[utils\data\cases\002\a\imaging\002_a (imaging...,[utils\data\cases\002\a\labs\002_a (labs) .pdf],[utils\data\cases\002\a\pa_form\002_a (form).pdf],[],2023-10-01T12:00:00Z,approved,Manually evaluated by MD based on the policies.
002_b,[utils\data\cases\002\b\doctor_notes\002_b (no...,[utils\data\cases\002\b\imaging\002_b (imaging...,[utils\data\cases\002\b\labs\002_b (labs).pdf],[utils\data\cases\002\b\pa_form\002_b (form).pdf],[],2023-10-01T12:00:00Z,approved,Manually evaluated by MD based on the policies.
003_a,[utils\data\cases\003\a\doctor_notes\003_a (no...,[],[utils\data\cases\003\a\labs\003_a (labs).pdf],[utils\data\cases\003\a\pa_form\003_a (form).pdf],[],2023-10-01T12:00:00Z,approved,Manually evaluated by MD based on the policies.
