In [61]:
!pip install openai



In [62]:
import os

os.environ['AZURE_OPENAI_API_KEY'] = 'azure_openai_api_key'
os.environ['AZURE_OPENAI_ENDPOINT'] = 'azure_openai_endpoint'
os.environ['AZURE_OPENAI_API_VERSION'] = 'azure_openai_api_version'
os.environ['AZURE_OPENAI_DEPLOYMENT'] = 'azure_openai_deployment'

USE_AZURE = True

In [63]:
import json
import re
import pandas as pd
from google.colab import files

uploaded = files.upload()

Saving structured_data_new.csv to structured_data_new (7).csv


In [64]:
reference_df = pd.read_csv(list(uploaded.keys())[0])

reference_df.columns = (
    reference_df.columns
    .str.strip()
    .str.lower()
    .str.replace(' ', '_')
    .str.replace(r'[()%]', '', regex=True)
)

reference_df.head()

Unnamed: 0,patient_id,name,age,gender,date,encounter_type,chief_complaint,hpi,temperature,pulse_bpm,o2_saturation_,diagnosis,plan,icd-10_code
0,P-1001,Robert Brown,47,Male,02-01-2024,Telemedicine Consultation,skin rash for 4 days,Localized skin rash with mild itching. No feve...,98,76,99,"Dermatitis, unspecified",Start antacids; Advise bland diet; Follow up i...,L30.9
1,P-1002,Sarah Wilson,29,Male,03-01-2024,Telemedicine Consultation,headache and dizziness for 6 days,"Mild to moderate headache without nausea, phot...",98,74,99,"Headache, unspecified",Start NSAIDs; Gentle stretching advised; Follo...,R51.9
2,P-1003,Olivia Clark,26,Male,04-01-2024,Telemedicine Consultation,mild fever and fatigue for 6 days,Acute onset fever with cough and sore throat. ...,100,88,97,Acute upper respiratory infection,Suggest rest and muscle relaxants; Apply warm ...,J06.9
3,P-1004,Michael Johnson,28,Male,05-01-2024,Outpatient Visit,lower back pain for 6 days,Symptoms began suddenly. Hydration helps sligh...,97,94,95,Mild Asthma Exacerbation,Prescribe throat lozenges; Recommend saltwater...,J45.901
4,P-1005,Michael Johnson,23,Female,06-01-2024,Telemedicine Consultation,headache and dizziness for 8 days,"Mild to moderate headache without nausea, phot...",98,74,99,"Headache, unspecified",Start analgesics; Recommend adequate hydration...,R51.9


In [65]:
def extract_json(text):
    """
    Extracts the first JSON object found in a string.
    """
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        match = re.search(r"\{.*\}", text, re.DOTALL)
        if match:
            return json.loads(match.group())
        else:
            raise ValueError("No JSON object found in model output")

In [66]:
from openai import AzureOpenAI

def generate_with_azure(patient_name, cc, hpi):
    client = AzureOpenAI(
        api_key=os.getenv("AZURE_OPENAI_API_KEY"),
        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
        api_version=os.getenv("AZURE_OPENAI_API_VERSION")
    )

    messages = [
        #{
        #    "role": "system",
        #   "content": (
        #        "You are a clinical documentation assistant. "
        #        "Return ONLY valid JSON. "
        #        "Do NOT include explanations, markdown, or code fences."
        #    )
        #}
        {
            "role": "system",
            "content": (
                "You are a clinical documentation assistant.\n"
                "The clinical_note MUST start with the patient's name.\n"
                "Return ONLY valid JSON.\n"
                "The field 'clinical_note' MUST be a single plain-text string.\n"
                "Do NOT nest objects inside clinical_note.\n"
                "Do NOT include chief_complaint or hpi as JSON objects."
            )
        },
        {
            "role": "user",
            "content": (
                f"Patient Name: {patient_name}\n"
                f"Chief Complaint: {cc}\n"
                f"HPI: {hpi}\n\n"
                "Generate a professional clinical note."
                "Return JSON with keys: "
                "assessment, icd10_code, icd10_description."
            )
        }
    ]

    response = client.chat.completions.create(
        model=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
        messages=messages,
        temperature=0.2
    )

    raw_text = response.choices[0].message.content
    print("RAW AZURE OUTPUT:\n", raw_text)

    return extract_json(raw_text)

In [67]:
def generate_with_mock(cc, hpi):
    text = (cc + ' ' + hpi).lower()
    if 'chest' in text:
        return {'clinical_note': 'Patient reports chest pain.', 'assessment': 'Chest pain', 'icd10_code': 'R07.9', 'icd10_description': 'Chest pain, unspecified'}
    elif 'headache' in text:
        return {'clinical_note': 'Patient presents with headache.', 'assessment': 'Headache', 'icd10_code': 'R51.9', 'icd10_description': 'Headache, unspecified'}
    else:
        return {'clinical_note': 'Patient presents with URI symptoms.', 'assessment': 'Upper respiratory infection', 'icd10_code': 'J06.9', 'icd10_description': 'Acute upper respiratory infection, unspecified'}

In [68]:
import os
os.makedirs('outputs', exist_ok=True)

for _, row in reference_df.iterrows():
    try:
        if USE_AZURE:
            data = generate_with_azure(row['name'],row['chief_complaint'],row['hpi'])
            backend = 'azure_openai'
        else:
            raise Exception('Azure disabled')
    except Exception as e:
        print("Azure OpenAI failed, falling back to mock.")
        print("Error:", e)
        result = generate_with_mock(row['chief_complaint'], row['hpi'])
        backend = "mock_llm"


    output = {'patient_id': row['patient_id'], **data, 'generation_metadata': {'backend': backend}}

    with open(f"outputs/{row['patient_id']}.json", 'w') as f:
        json.dump(output, f, indent=2)

os.listdir('outputs')

RAW AZURE OUTPUT:
 {
  "clinical_note": "Robert Brown presents with a localized skin rash for 4 days accompanied by mild itching. There are no associated fever or systemic symptoms reported.",
  "assessment": "Likely contact dermatitis or mild allergic reaction.",
  "icd10_code": "L23.9",
  "icd10_description": "Allergic contact dermatitis, unspecified cause"
}
RAW AZURE OUTPUT:
 {
  "clinical_note": "Sarah Wilson presents with a 6-day history of headache and dizziness. The headache is described as mild to moderate in intensity and is not associated with nausea, photophobia, or aura.",
  "assessment": "Likely tension-type headache or vestibular dysfunction. Further evaluation needed to rule out other causes.",
  "icd10_code": "R51.9",
  "icd10_description": "Headache, unspecified"
}
RAW AZURE OUTPUT:
 {
  "clinical_note": "Olivia Clark presents with a mild fever and fatigue for 6 days. She reports an acute onset of fever accompanied by cough and sore throat. There is no shortness of br

['P-1016.json',
 'P-1071.json',
 'P-1052.json',
 'P-1011.json',
 'P-1066.json',
 'P-1084.json',
 'P-1006.json',
 'P-1044.json',
 'P-1041.json',
 'P-1045.json',
 'P-1090.json',
 'P-1076.json',
 'P-1015.json',
 'P-1047.json',
 'P-1051.json',
 'P-1097.json',
 'P-1067.json',
 'P-1070.json',
 'P-1077.json',
 'P-1043.json',
 'P-1023.json',
 'P-1073.json',
 'P-1048.json',
 'P-1072.json',
 'P-1029.json',
 'P-1012.json',
 'P-1089.json',
 'P-1018.json',
 'P-1091.json',
 'P-1038.json',
 'P-1080.json',
 'P-1063.json',
 'P-1009.json',
 'P-1017.json',
 'P-1086.json',
 'P-1036.json',
 'P-1095.json',
 'P-1093.json',
 'P-1022.json',
 'P-1025.json',
 'P-1034.json',
 'P-1065.json',
 'P-1053.json',
 'P-1082.json',
 'P-1062.json',
 'P-1027.json',
 'P-1014.json',
 'P-1010.json',
 'P-1013.json',
 'P-1039.json',
 'P-1058.json',
 'P-1088.json',
 'P-1028.json',
 'P-1057.json',
 'P-1033.json',
 'P-1060.json',
 'P-1075.json',
 'P-1030.json',
 'P-1092.json',
 'P-1026.json',
 'P-1055.json',
 'P-1079.json',
 'P-1099