In [9]:
import os
import backoff
import openai
from openai import AzureOpenAI

error_types = (openai.BadRequestError, TypeError)

client = AzureOpenAI(
    api_version="2023-07-01-preview",
    api_key=os.getenv('OPENAIAZURE_APIKEY'),
    azure_endpoint="https://gpt4v-jb.openai.azure.com",
)

@backoff.on_exception(backoff.expo, error_types)
def completions_with_backoff(**kwargs):
    return client.chat.completions.create(**kwargs)

def call_gpt_azure(system_prompt, prompt, temperature=0, n=1):
    response = completions_with_backoff(
        model="JBGPT4TURBO_1106_PREVIEW",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ],
        temperature=temperature,
        n=n,
    )
    cost = response.usage.completion_tokens * (0.06 / 1000) + response.usage.prompt_tokens * (0.03 / 1000)
    completion = response.choices[0].message.content
    return completion, cost

In [10]:
from openai import OpenAI
client = OpenAI(api_key=os.getenv('OPENAI_APIKEY'))

def call_gpt(system_prompt, user_prompt, temperature=0, max_tokens=512):
    response = client.chat.completions.create(
        # model="gpt-4-1106-preview",
        model="gpt-4-0125-preview",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=temperature,
        max_tokens=max_tokens,
    )
    completion = response.choices[0].message.content
    return completion

In [11]:
predicates = \
"""predicates:
admission:
    column: event_type
    value: ADMISSION
    system: boolean
discharge:
    column: event_type
    value: DISCHARGE
    system: boolean
death:
    column: event_type
    value: DEATH
    system: boolean
discharge_or_death:
    type: ANY
    predicates: [discharge, death]
    system: boolean
any:
    type: special"""

In [12]:
task_description = f"""The task is to predict in-hospital mortality. I want to extract a cohort of all patients that were admitted to the hospital. These patients must have been in the hospital for at least 48 hours (ie. did not get re-admitted, discharged, or die within the 48 hour window), and were either discharged or died in the hospital after. I want to use data from the past 30 days before admission as well as 24 hours into the admission. The patients should have at least 50 of any type of event in the window leading up to the admission.

Please use the following predicates:
{predicates}"""

In [13]:
system_prompt = """You are an expert with electronic health records and understand the structure of medical time series data."""

user_prompt = f"""Your objective is to create a configuration file to represent a patient cohort. The configuration file will be used to query a dataset for valid patients that are in the cohort. You will be provided predicates that you will use to define the cohort for the task. Using these predicates and the cohort description, define windows with constraints that segment the patient time series data.

Here is an example configuration file with windows for a cohort used to predict 24-hour inhospital mortality:
```
windows:
    trigger:
        start: admission
        duration:
        offset:
        end: admission
        excludes:
        includes: 
        - predicate: admission
            min: 1
            max: 1
        st_inclusive: False
        end_inclusive: True
    gap:
        start: trigger.end
        duration: 48 hours
        offset:
        end:
        excludes: 
        - predicate: death
        includes: 
        st_inclusive: False
        end_inclusive: True
    target:
        start: gap.end
        duration: 
        offset:
        end: discharge_or_death
        excludes:
        includes: 
        - predicate: discharge_or_death
            min: 1
            max: 1
        st_inclusive: False
        end_inclusive: True
        label: death
    input:
        start:
        duration: -30 days
        offset:
        end: trigger.start
        excludes:  
        includes: 
        st_inclusive: False
        end_inclusive: True
```

Here is an example configuration file with windows for a cohort used to predict abnormal hemoglobin levels:
```
windows:
    trigger:
        start: normal_hemoglobin
        duration:
        offset:
        end: normal_hemoglobin
        excludes:
        includes: 
        - predicate: normal_hemoglobin
            min: 1
        st_inclusive: False
        end_inclusive: True
    gap:
        start: trigger.end
        duration: 24 hours
        offset:
        end:
        excludes:
        includes: 
        st_inclusive: False
        end_inclusive: True
    target:
        start: gap.end
        duration: 7 days
        offset:
        end: 
        excludes:
        includes: 
        - predicate: abnormal_hemoglobin
            min: 1
        st_inclusive: False
        end_inclusive: True
        label: abnormal_hemoglobin
    input:
        start:
        duration: 30 days
        offset:
        end: trigger.start
        excludes:
        includes: 
        - predicate: any
        min: 5
        st_inclusive: False
        end_inclusive: True
```

Here is an example configuration file with windows for a cohort used to extract heart rate outliers:
```
windows:
    trigger:
        start: admission
        duration:
        offset:
        end: admission
        excludes:
        includes: 
        - predicate: admission
            min: 1
            max: 1
        st_inclusive: False
        end_inclusive: True
    gap:
        start: trigger.end
        duration: 2 days
        offset:
        end:
        excludes:
        - predicate: admission
        - predicate: discharge
        - predicate: death
        includes: 
        st_inclusive: False
        end_inclusive: True
    target:
        start: gap.end
        duration:
        offset:
        end: discharge_or_death
        excludes:
        includes: 
        - predicate: outlier_heart_rate
            min: 1
            max: 1
        - predicate: discharge_or_death
            min: 1
            max: 1
        st_inclusive: False
        end_inclusive: True
        label: death
```

Here is an example configuration file with windows for a cohort used to predict 48-hour imminent mortality:
```
windows:
    trigger:
        start: any
        duration:
        offset:
        end: any
        excludes:
        includes: 
        - predicate: any
            min: 1
            max: 1
        st_inclusive: False
        end_inclusive: True
    target:
        start: trigger.end
        duration: 48 hours
        offset:
        end: 
        excludes:
        includes:
        st_inclusive: False
        end_inclusive: True
        label: death
```

Here is an example configuration file with windows for a cohort used to predict 30-day readmission risk:
```
windows:
    trigger:
        start: admission
        duration:
        offset:
        end: admission
        excludes:
        includes: 
        - predicate: admission
            min: 1
            max: 1
        st_inclusive: False
        end_inclusive: True
    input:
        start: trigger.end
        duration:
        offset:
        end: discharge
        excludes:
        includes: 
        - predicate: discharge
            min: 1
            max: 1
        - predicate: any
            min: 5
        st_inclusive: False
        end_inclusive: True
    target:
        start: input.end
        duration: 30 days
        offset:
        end:
        excludes:
        includes: 
        st_inclusive: False
        end_inclusive: True
        label: admission
```

Here is an example configuration file with windows for a cohort used to predict time to weaning from mechanical ventilation:
```
windows:
    trigger:
        start: ventilation_on
        duration:
        offset:
        end: ventilation_on
        excludes:
        includes: 
        - predicate: ventilation_on
            min: 1
            max: 1
        st_inclusive: False
        end_inclusive: True
    target:
        start: trigger.end
        duration: 
        offset:
        end: ventilation_off
        excludes:
        includes: 
        - predicate: ventilation_off
            min: 1
            max: 1
        st_inclusive: False
        end_inclusive: True
        label: 
    input:
        start:
        duration: -30 days
        offset:
        end: trigger.start
        excludes:  
        includes: 
        - predicate: any
            min: 100
        st_inclusive: False
        end_inclusive: True
```

Here is an example configuration file with windows for a cohort used to predict long-term incidence of myocardial infarction:
```
windows:
    trigger:
        start: discharge
        duration:
        offset:
        end: discharge
        excludes:
        includes: 
        - predicate: discharge
            min: 1
            max: 1
        st_inclusive: False
        end_inclusive: True
    gap:
        start: trigger.end
        duration: 1 year
        offset:
        end:
        excludes:
        - predicate: myocardial_infarction
        includes:
        st_inclusive: False
        end_inclusive: True
        label: 
    target:
        start: gap.end
        duration: 3 years
        offset:
        end:
        excludes:
        includes: 
        st_inclusive: False
        end_inclusive: True
        label: myocardial_infarction
    input:
        start: admission
        duration:
        offset:
        end: trigger.start
        excludes:  
        includes: 
        - predicate: any
            min: 100
        - predicate: discharge
            min: 1
            max: 1
        st_inclusive: False
        end_inclusive: True
```

Time strictly increases in this context, thus negative indicates 'before' the event and positive indicates 'after' the event.

Each window has a 'name', 'start', 'duration', 'offset', 'end', 'excludes', 'includes', 'st_inclusive', 'end_inclusive'. One of the windows also have a 'label' field. A window can be a time-bound window (defined by a 'start' and a 'duration') or a predicate-bound window (defined by a 'start' and an 'end'), and certain fields can be left blank if they are not applicable.

'trigger', 'gap', 'target', and 'input' are example windows. 'trigger' is often a window that has one event (ie. the same 'start' and 'end' predicate). 'gap' is often a window that excludes certain predicates. 'target' is often a window that has the event of interest and thus has the 'label' field. 'input' is often a window that has the events that are used to predict the event of interest.

Fields:
    - 'start' is the name of the predicate that the window starts from. It can also be the "end" parameter of a previous window.
    - 'duration' is the duration of a time-bounded window and is defined using a number and one of seconds, minutes, hours, or days.
    - 'end' is the name of the predicate that an event-bounded window ends at.
    - 'offset' is the offset from the 'start', defined using a number and one of seconds, minutes, hours, or days.
    - 'excludes' is a list of predicates that the window must not contain and is defined by their names.
    - 'includes' is a list of predicates that the window must contain and is defined by their names and the min and max number of times they need to occur.
    - 'st_inclusive' is a boolean that determines if the predicate at the start of the window should be included within the window.
    - 'end_inclusive' is a boolean that determines if the predicate at the end of the window should be included within the window. 

'label' is reserved for one window and is a predicate name. This field should be used to capture the question that the cohort aims to answer (ie. does the patient die in this window?)

Please only output the configuration yaml.

----------------

Cohort:
{task_description}

----------------

```
<your_windows_configuration_here>
```
"""

In [14]:
print(user_prompt)

Your objective is to create a configuration file to represent a patient cohort. The configuration file will be used to query a dataset for valid patients that are in the cohort. You will be provided predicates that you will use to define the cohort for the task. Using these predicates and the cohort description, define windows with constraints that segment the patient time series data.

Here is an example configuration file with windows for a cohort used to predict 24-hour inhospital mortality:
```
windows:
    trigger:
        start: admission
        duration:
        offset:
        end: admission
        excludes:
        includes: 
        - predicate: admission
            min: 1
            max: 1
        st_inclusive: False
        end_inclusive: True
    gap:
        start: trigger.end
        duration: 48 hours
        offset:
        end:
        excludes: 
        - predicate: death
        includes: 
        st_inclusive: False
        end_inclusive: True
    target:
     

In [15]:
response = call_gpt(system_prompt, user_prompt)
# response, _ = call_gpt_azure(system_prompt, user_prompt)

In [16]:
print(response)

```yaml
windows:
  trigger:
    start: admission
    duration:
    offset:
    end: admission
    excludes:
    includes: 
      - predicate: admission
        min: 1
        max: 1
    st_inclusive: False
    end_inclusive: True
  gap:
    start: trigger.end
    duration: 48 hours
    offset:
    end:
    excludes: 
      - predicate: admission
      - predicate: discharge
      - predicate: death
    includes: 
    st_inclusive: False
    end_inclusive: True
  target:
    start: gap.end
    duration:
    offset:
    end: discharge_or_death
    excludes:
    includes: 
      - predicate: discharge_or_death
        min: 1
        max: 1
    st_inclusive: False
    end_inclusive: True
    label: death
  input:
    start:
    duration: -30 days
    offset:
    end: trigger.start
    excludes:
    includes: 
      - predicate: any
        min: 50
    st_inclusive: False
    end_inclusive: True
```
