# How many NTL calls will be dropped?

In this notebook, we explore the number of Nurse Triage Line calls that _would have been_ sent to Basic Life Support had the NTL we are implementing existed in 2016. That is, if there were $R$ nurses available between 7am and 11pm, with $R_\ell$ of them being local, how many calls would get sent directly to the _status quo_ BLS service instead of being serviced by the nurse.

In [None]:
from collections import deque
from datetime import timedelta

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from femsntl.datafiles import EMS_EVENTS_2016

In [None]:
df = pd.read_csv(EMS_EVENTS_2016)

## A little exploration

The NTL will introduce a new coding system to OUC calling out new, specific dispatch codes which are NTL-eligible. In our discussions with FEMS, the old `Alpha` and `Omega` codes form a rough superset of all the new NTL-eligible codes. Thus, to compute a conservative estimate of all dropped calls, we can examine only the Alphas and Omegas.

In [None]:
# Keep the time of the call and the dispatch code; mark whether the call is A or O
df = df[["AD_TS", "TYCOD"]]
df = df.copy()
df["ao"] = df.TYCOD.apply(lambda x: (len(str(x)) >= 3) and (str(x)[2].upper() in "AO"))

print("Proportion of all class which are A or O", df.ao.mean())

In [None]:
# Convert the timestamp field to an actual timestamp
print(df.AD_TS.sample(n=10))
df["ad_ts"] = df.AD_TS.apply(lambda x: pd.to_datetime(x[:-2]))

In [None]:
plt.figure(figsize=(10, 8))

df.groupby(pd.Grouper(key="ad_ts", freq="D")).size().plot(label="All calls")
df[df.ao].groupby(pd.Grouper(key="ad_ts", freq="D")).size().plot(label="AO calls")

plt.xlabel("Time of call")
plt.ylabel("Count of calls")
plt.title("Number of calls per day")
plt.legend()
plt.show();

In [None]:
print("AO calls per day of week")
df[df.ao].ad_ts.dt.dayofweek.value_counts().sort_index()

In [None]:
print("All calls per day of week")
df.ad_ts.dt.dayofweek.value_counts().sort_index()

## Computing dropped calls

In order to compute how many calls actually end up being dropped, we assume that each call takes a fixed number of minutes and that once a call is complete, the nurse is immediately available.

In [None]:
def dropped_timestamps(timestamps, delta=timedelta(minutes=10), num_slots=6):
    """
    Determine which calls at which timestamps would end up dropped if there
    are fixed number of slots available and the calls are of a fixed length.

    Arguments:
        timestamps (list[datetime]): The list of call times
        delta (timedelta): The fixed length of the call
        num_slots (int): The number of slots available for calls

    Returns:
        list[(int, datetime, int)]: The index, timestamp, and which slot
            answered the call. If the slot == num_slots, the call was
            dropped. Slots in [0, num_slots) are actual slots.
    """
    timestamps = sorted(timestamps)

    on_call = deque([])
    nurse_idx = []

    for i, next_call in enumerate(timestamps):
        # Remove calls which are now over
        while on_call and on_call[0] < next_call:
            on_call.popleft()

        if len(on_call) < num_slots:
            nurse_idx.append((i, next_call, len(on_call)))
            on_call.append(next_call + delta)
        else:
            nurse_idx.append((i, next_call, num_slots))
    return nurse_idx

### Visualize the world where _all_ AO calls are sent to the nurse

In [None]:
nurse_idx = dropped_timestamps(df[df.ao].ad_ts)
nurse_df = pd.DataFrame.from_records(nurse_idx, columns=["idx", "timestamp", "nurse"])
ddf = (
    nurse_df.groupby(["nurse", pd.Grouper(key="timestamp", freq="D")])
    .size()
    .rename("num_calls")
    .reset_index()
)

In [None]:
for nurse in ddf.nurse.unique():
    ddf[ddf.nurse == nurse].groupby(
        pd.Grouper(key="timestamp", freq="W")
    ).num_calls.sum().plot(label=str(nurse), logy=True)
plt.xlabel("Date of call")
plt.ylabel("Number of calls per Week")
plt.title("Number of calls per week assigned to each slot")
plt.legend();

### Visualize the study universe

In our study, the nurse will be available from 7am until 11pm. Moreover, only a random half of eligible callers will be sent to the nurse.

In [None]:
eligible_df = df[df.ao & (df.ad_ts.dt.hour >= 7) & (df.ad_ts.dt.hour < 23)]
is_treatment = (np.arange(len(eligible_df)) % 2).astype(bool)
r = np.random.RandomState(25)
r.shuffle(is_treatment)

In [None]:
def _plot_drops_by_timeperiod(timestamps, delta_minutes, tp, num_slots=4, ax=None):
    delta = timedelta(minutes=delta_minutes)
    nurse_idx = dropped_timestamps(timestamps, delta=delta, num_slots=num_slots)
    nurse_df = pd.DataFrame.from_records(
        nurse_idx, columns=["idx", "timestamp", "nurse"]
    )

    if tp == "H":
        counts_df = (
            nurse_df.groupby(["nurse", nurse_df.timestamp.dt.hour])
            .size()
            .rename("num_calls")
        )
    else:
        counts_df = (
            nurse_df.groupby(["nurse", pd.Grouper(key="timestamp", freq=tp)])
            .size()
            .rename("num_calls")
        )

    for nurse in nurse_df.nurse.unique():
        counts_df.loc[nurse].plot(label=nurse, logy=True, ax=ax)
    ax.set_label("")

    counts_df = counts_df.reset_index()
    percent_dropped = (
        counts_df[counts_df.nurse >= num_slots].num_calls.sum()
        / counts_df.num_calls.sum()
    )
    ax.set_title(
        f"{delta_minutes} min triage: {round(percent_dropped * 100, 2)}% Dropped"
    )


def plot_drops_by_hour(timestamps, delta_minutes, num_slots=4, ax=None):
    _plot_drops_by_timeperiod(
        timestamps, delta_minutes, "H", num_slots=num_slots, ax=ax
    )


def plot_drops_by_week(timestamps, delta_minutes, num_slots=4, ax=None):
    _plot_drops_by_timeperiod(
        timestamps, delta_minutes, "W", num_slots=num_slots, ax=ax
    )

In [None]:
fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(6.5 * 2, 9 * 2))

plot_drops_by_week(eligible_df[is_treatment].ad_ts, 10, ax=axs[0, 0])
plot_drops_by_week(eligible_df[is_treatment].ad_ts, 12.5, ax=axs[1, 0])
plot_drops_by_week(eligible_df[is_treatment].ad_ts, 15, ax=axs[2, 0])

plot_drops_by_hour(eligible_df[is_treatment].ad_ts, 10, ax=axs[0, 1])
plot_drops_by_hour(eligible_df[is_treatment].ad_ts, 12.5, ax=axs[1, 1])
plot_drops_by_hour(eligible_df[is_treatment].ad_ts, 15, ax=axs[2, 1])

for i in [0, 1]:
    for j in [0, 1]:
        axs[i, j].set_xlabel("")
        axs[i, j].set_xticks([], minor=True)
        axs[i, j].set_xticks([], minor=False)
        axs[i, j].set_xticklabels([], minor=True)
        axs[i, j].set_xticklabels([], minor=False)
axs[2, 0].set_xlabel("Week")
axs[2, 1].set_xlabel("Hour of Day");

In [None]:
def percent_local_by_week(timestamps, delta_minutes, num_local=1, num_slots=4):
    delta = timedelta(minutes=delta_minutes)
    nurse_idx = dropped_timestamps(timestamps, delta=delta, num_slots=num_slots)
    nurse_df = pd.DataFrame.from_records(
        nurse_idx, columns=["idx", "timestamp", "nurse"]
    )
    ddf = (
        nurse_df.groupby(["nurse", pd.Grouper(key="timestamp", freq="W")])
        .size()
        .rename("num_calls")
        .reset_index()
    )

    return ddf[ddf.nurse < num_local].num_calls.sum() / ddf.num_calls.sum()

In [None]:
for minutes in [10, 12.5, 15]:
    for num_local in [1, 2, 3, 4]:
        print(
            minutes,
            num_local,
            percent_local_by_week(
                eligible_df[is_treatment].ad_ts, minutes, num_local=num_local
            ),
        )