In [9]:
import tempfile
import json
from azure.ai.evaluation import evaluate
from src.aifoundry.aifoundry_helper import AIFoundryManager
from src.evals.custom.fuzzy_evaluator import FuzzyEvaluator
import os
from azure.ai.projects import AIProjectClient

# Initialize the AI Foundry Manager.
# This will internally initialize the project configuration.
ai_foundry_manager = AIFoundryManager(
    project_connection_string="eastus2.api.azureml.ms;28d2df62-e322-4b25-b581-c43b94bd2607;rg-priorauth-eastus2-hls-autoauth;evaluations"
)

# Create static evaluation data.
static_data = {
    "query": "patient_info.patient_name",
    "response": "Sarah Sample",
    "ground_truth": "Sarah Sample",
}

# Write the static evaluation record to a temporary JSONL file.
with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".jsonl") as temp_file:
    temp_file.write(json.dumps(static_data) + "\n")
    temp_file.flush()
    dataset_path = temp_file.name

# Print the contents of the temporary JSONL file.
print("Dataset contents:")
with open(dataset_path, "r") as f:
    print(f.read())

ocr_args = {
    "uploaded_files": "dummy.pdf",  # Dummy file path (won't be used in this static test)
    "expected_output": {
        "ocr_ner_results": {"patient_info": {"patient_name": "Sarah Sample"}}
    },
    "similarity_threshold": 95.0,
}

evaluator = FuzzyEvaluator(args=ocr_args)

2025-02-09 14:16:11,002 - AIFoundryManager - MainProcess - INFO     Configuration validation successful. (aifoundry_helper.py:_validate_configurations:58)
2025-02-09 14:16:11,003 - AIFoundryManager - MainProcess - INFO     AIProjectClient initialized successfully. (aifoundry_helper.py:_initialize_project:101)


Dataset contents:
{"query": "patient_info.patient_name", "response": "Sarah Sample", "ground_truth": "Sarah Sample"}

Templates found: ['evaluator_system_prompt.jinja', 'evaluator_user_prompt.jinja', 'formulator_system_prompt.jinja', 'formulator_user_prompt.jinja', 'ner_clinician_system.jinja', 'ner_clinician_user.jinja', 'ner_patient_system.jinja', 'ner_patient_user.jinja', 'ner_physician_system.jinja', 'ner_physician_user.jinja', 'ner_system_prompt.jinja', 'ner_user_prompt.jinja', 'prior_auth_o1_user_prompt.jinja', 'prior_auth_o1_user_prompt_b.jinja', 'prior_auth_system_prompt.jinja', 'prior_auth_user_prompt.jinja', 'query_classificator_system_prompt.jinja', 'query_classificator_user_prompt.jinja', 'query_expansion_system_prompt.jinja', 'query_expansion_user_prompt.jinja', 'summarize_policy_system.jinja', 'summarize_policy_user.jinja']


In [10]:
dataset_path

'/var/folders/j8/3l6kwfpn4ll3ztd_k3_n2yj00000gn/T/tmpq3ljgb7a.jsonl'

In [12]:
# Call the evaluate() function.
# In this test, we are passing a dummy evaluator dictionary for "OCRNEREvaluator" (set to None),
# and using the project configuration from the AI Foundry Manager.
azure_result = evaluate(
    data=dataset_path,  # The temporary JSONL file path.
    evaluators={"OCRNEREvaluator": evaluator},
    evaluator_config={
        "OCRNEREvaluator": {
            "column_mapping": {
                "query": "${data.query}",
                "ground_truth": "${data.ground_truth}",
                "response": "${data.response}",
            }
        }
    },
    azure_ai_project=ai_foundry_manager.project_config,  # Uses the project config initialized in the manager.
)

print("Azure evaluation result:")
print(azure_result)

[2025-02-09 14:19:44 -0700][promptflow._sdk._orchestrator.run_submitter][INFO] - Submitting run src_evals_custom_ocr_ner_evaluator_ocrnerevaluator_25lqzdxm_20250209_141944_395455, log path: /Users/marcjimz/.promptflow/.runs/src_evals_custom_ocr_ner_evaluator_ocrnerevaluator_25lqzdxm_20250209_141944_395455/logs.txt


Templates found: ['evaluator_system_prompt.jinja', 'evaluator_user_prompt.jinja', 'formulator_system_prompt.jinja', 'formulator_user_prompt.jinja', 'ner_clinician_system.jinja', 'ner_clinician_user.jinja', 'ner_patient_system.jinja', 'ner_patient_user.jinja', 'ner_physician_system.jinja', 'ner_physician_user.jinja', 'ner_system_prompt.jinja', 'ner_user_prompt.jinja', 'prior_auth_o1_user_prompt.jinja', 'prior_auth_o1_user_prompt_b.jinja', 'prior_auth_system_prompt.jinja', 'prior_auth_user_prompt.jinja', 'query_classificator_system_prompt.jinja', 'query_classificator_user_prompt.jinja', 'query_expansion_system_prompt.jinja', 'query_expansion_user_prompt.jinja', 'summarize_policy_system.jinja', 'summarize_policy_user.jinja']
2025-02-09 14:19:44 -0700   29893 execution.bulk     INFO     Current thread is not main thread, skip signal handler registration in BatchEngine.
2025-02-09 14:19:44 -0700   29893 execution.bulk     INFO     The timeout for the batch run is 3600 seconds.
2025-02-09 14