In [1]:
import glob

In [2]:
all_files = glob.glob("../data/processed/*.csv")

In [3]:
all_files[:8]

['../data/processed/2019_07_15_1957_5_3hosp_austin_vicsim_codestroke_usage.csv',
 '../data/processed/2019_07_15_1952_3_4hosp_vicsim_simdata.csv',
 '../data/processed/2019_07_15_1947_3_3hosp_austin_vicsim_acutebed_usage.csv',
 '../data/processed/2019_07_15_1959_5_3hosp_alfred_vicsim_acutebed_usage.csv',
 '../data/processed/2019_07_15_1954_5_2hosp_vicsim_codestroke_usage.csv',
 '../data/processed/2019_07_15_2018_15_3hosp_austin_vicsim_codestroke_usage.csv',
 '../data/processed/2019_07_15_2005_10_2hosp_vicsim_dataanal.csv',
 '../data/processed/2019_07_15_1959_5_3hosp_alfred_vicsim_simdata.csv']

In [4]:
ecr_proportion_list = [3, 5, 10, 15]
combination_list = ["2hosp", "3hosp_austin", "3hosp_alfred", "4hosp"]

combination_string_dict = {
    "2hosp": "ECR Capable = Monash + RMH",
    "3hosp_austin": "ECR Capable = Monash + RMH + Austin",
    "3hosp_alfred": "ECR Capable = Monash + RMH + Alfred",
    "4hosp": "ECR Capable = Monash + RMH + Austin + Alfred"
}

hospital_data_folder = {
    "2hosp": "../data/internal/hospitalupdate/2hosp/",
    "3hosp_austin": "../data/internal/hospitalupdate/3hosp/",
    "3hosp_alfred": "../data/internal/hospitalupdate/3hosp_alfred/",
    "4hosp": "../data/internal/hospitalupdate/4hosp/"
}

In [5]:
import re

In [6]:
def make_regex(proportion : int, combination : str, datatype : str):
    """
    datatype must be one of:
        - "acutebed_usage"
        - "codestroke_usage"
        - "simdata"
        - "dataanal"
    """
    regex = r"../data/processed/(\d+_\d+_\d+_\d+)_{}_{}_vicsim_{}.csv".format(
        str(proportion),
        combination,
        datatype,
    )
    return re.compile(regex)

In [7]:
regex = make_regex(ecr_proportion_list[2], combination_list[2], "acutebed_usage")

In [8]:
[file for file in all_files if regex.match(file)]

['../data/processed/2019_07_15_2010_10_3hosp_alfred_vicsim_acutebed_usage.csv']

# Plotting Functions

In [9]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from mpl_toolkits.basemap import Basemap
import matplotlib
from matplotlib.patches import Polygon
from matplotlib.colors import hsv_to_rgb
from matplotlib.collections import PatchCollection

import shapefile

from geopy import distance

In [10]:
location_data = pd.read_csv('../../data/processed/hospital_data.csv')

In [11]:
csc_data_dict = {
    "2hosp": location_data[
        (location_data.name == "Monash Health") | (location_data.name == "Royal Melbourne Hospital")
    ],
    "3hosp_austin": location_data[
        (location_data.name == "Monash Health") |
        (location_data.name == "Royal Melbourne Hospital")  |
        (location_data.name == "Austin Health")
    ],
    "3hosp_alfred": location_data[
        (location_data.name == "Monash Health") |
        (location_data.name == "Royal Melbourne Hospital")  |
        (location_data.name == "The Alfred")        
    ],
    "4hosp": location_data[
        (location_data.name == "Monash Health") |
        (location_data.name == "Royal Melbourne Hospital")  |
        (location_data.name == "Austin Health") |
        (location_data.name == "The Alfred")
    ]
}

In [12]:
def get_hospital_data(combination: str):
    hospital_data_columns = ["name", "capacity", "acutebedCapacity", "nonacutebedCapacity"]

    rural_hospital_data = pd.read_csv(
        hospital_data_folder[combination] + "rhosp.csv", names=hospital_data_columns
    )
    rural_hospital_data["hospital_type"] = "rural"

    peripheral_hospital_data = pd.read_csv(
        hospital_data_folder[combination] + "pschosp.csv", names=hospital_data_columns
    )
    peripheral_hospital_data["hospital_type"] = "peripheral"

    central_hospital_data = pd.read_csv(
        hospital_data_folder[combination] + "cschosp.csv", names=hospital_data_columns
    )
    central_hospital_data["hospital_type"] = "central"
    
    hospital_data = pd.concat(
        [rural_hospital_data, peripheral_hospital_data, central_hospital_data]
    ).set_index("name")  
    
    central_hospitals = hospital_data[hospital_data.hospital_type == "central"].index

    angio_dicts = [{"name": hospital, "capacity": hospital_data.acutebedCapacity.loc[hospital]} for hospital in central_hospitals]

    angio_capacities = pd.DataFrame.from_records(angio_dicts).set_index("name")
    
    return (hospital_data, central_hospitals, angio_capacities)

In [13]:
def plot_csc(df, csc_data, zoom_scale, show_hospitals=True):
    """ Plots per-patient CSC allocations.
    
    Args:
        df (Pandas.DataFrame): simulation data
        zoom_scale (float)
    
    """    
    centre = [-37.8, 145]
    bbox = [centre[0]-zoom_scale, centre[0]+zoom_scale,
            centre[1]-zoom_scale, centre[1]+zoom_scale]

    m = Basemap(projection='merc',llcrnrlat=bbox[0],urcrnrlat=bbox[1],
                llcrnrlon=bbox[2],urcrnrlon=bbox[3],lat_ts=10,resolution='i')
    m.drawcoastlines()
    m.fillcontinents(alpha=0.1)
    m.drawparallels(np.arange(bbox[0],bbox[1],(bbox[1]-bbox[0])/5),labels=[1,0,0,0], color="#cccccc")
    m.drawmeridians(np.arange(bbox[2],bbox[3],(bbox[3]-bbox[2])/5),labels=[0,0,0,1],rotation=90, color="#cccccc")
    m.drawmapboundary(fill_color='white')

    def convert(origin_latitude, origin_longitude):
        return m(origin_longitude, origin_latitude)

    m_coords = []

    for csc in csc_data.name:
        subset = df[(df.hospital_of_arrival == csc) | (df.hospital_of_transfer == csc)]
        m_coords.append(subset[['stroke_location_latitude','stroke_location_longitude']].apply(lambda x: convert(*x), axis=1))

    axes = []
    
    # Submission only colour modifications, as per request.
    colours = {
        "Austin Health": (1,0.5,0.5),
        "Monash Health": (0.5,1,0.5),
        "Royal Melbourne Hospital": (0.5,0.5,1),
        "The Alfred": (0.96, 0.74,0.26)
    }
    
    for i, cs in enumerate(m_coords):
        xs = [c[0] for c in cs]
        ys = [c[1] for c in cs]
        name = csc_data.iloc[i]["name"]
        axes.append(m.scatter(xs,ys,marker='D',c=colours[name], s=4/zoom_scale, alpha=0.9))
    
    # Submission only name modifications, as per request.
    renamed = csc_data.copy(deep=True)
    renamed.loc[0, "name"] = "Austin Hospital"
    renamed.loc[1, "name"] = "Monash Medical Centre"
    
    if show_hospitals:
        hosp_coords = csc_data[['latitude', 'longitude']].apply(lambda x: convert(*x), axis=1)
        for i, c in enumerate(hosp_coords):
            min_c = convert(bbox[0], bbox[2])
            max_c = convert(bbox[1], bbox[3])
            if c[0] > min_c[0] and c[0] < max_c[0] and c[1] > min_c[1] and c[1] < max_c[1]:
                m.plot(c[0], c[1], 'r*')
                plt.text(c[0], c[1], renamed.iloc[i]["name"], fontsize=12)
    else:
        plt.legend(axes, renamed.name)

In [14]:
def append_to_series(series, last_value_change):
    return series.append(pd.Series(series.iloc[-1] + last_value_change))

def plot_utilisation_over_time(usage, resource_capacities, resource_type, plot_yticks=False):
    
    n_resources = len(resource_capacities)
    resource_names = set(usage.name)

    fig, axes = plt.subplots(nrows=n_resources, sharex = True)

    for i, resource in enumerate(resource_names):
        subset = usage[usage.name == resource]
        
        # Just use the first 31 days
        subset = subset[subset.event_time < (60 * 24 * 31)]

        ys_capacity = subset.capacity_at_event_time
        ys_queue = subset.queue_at_event_time    
        events = subset.event

        # Calculate last value
        try:
            if events.iloc[-1] == "request":
                if ys_capacity.iloc[-1] == resource_capacities.loc[resource].capacity: # last one is queued
                    ys_capacity = append_to_series(ys_capacity, 0)
                    ys_queue = append_to_series(ys_queue, 1)
                else: # last one is admitted
                    ys_capacity = append_to_series(ys_capacity, 1)
                    ys_queue = append_to_series(ys_queue, 0)
            else:
                if ys_queue.iloc[-1] > 0: # patient was in queue
                    ys_capacity = append_to_series(ys_capacity, 0)
                    ys_queue = append_to_series(ys_queue, -1)
                else:
                    ys_capacity = append_to_series(ys_capacity, -1)
                    ys_queue = append_to_series(ys_queue, 0)
                    
                # Stack capacity and queue
            ys = ys_capacity + ys_queue
            xs = pd.Series([0]).append(subset.event_time)
        except IndexError: # likely no entries
            xs = []
            ys = []

        #axes[i].step(xs, ys, where="post", label="Queue", linewidth=0.05)
        #axes[i].step(xs, ys_capacity, where="post", label="Capacity", linewidth=0.05)
        axes[i].fill_between(xs, ys, step="post", label="Queue", alpha = 0.5)
        axes[i].fill_between(xs, ys_capacity, step="post", label="Capacity", alpha=1)
        #axes[i].bar(xs, ys, label="Queue")
        #axes[i].bar(xs, ys_capacity, label="Capacity")
        #zaxes[i].set_ylim([0, max(usage.capacity_at_event_time) + max(usage.queue_at_event_time)])
        #axes[i].set_xlim([0, max(usage.event_time)])
        axes[i].set_ylabel(resource, rotation=0, labelpad=80)
        
        if not plot_yticks:
            axes[i].set_yticklabels([]) # remove y-tick

        if i == 0:
            axes[i].legend(loc="upper right")
            axes[i].set_title('Capacity and Queue (Stacked) of {} During Simulation (First 31 Days)'.format(resource_type))

    fig.text(0.5, 0.05, 'Simulation Time (minutes)', ha='center')

In [15]:
def plot_total_utilisation(usage, resource_capacities, max_time, resource_type):
    
    n_resources = len(resource_capacities)
    resource_names = set(usage.name)
    
    resource_names_with_capacities = ["{} (max {})".format(resource, resource_capacities.at[resource, "capacity"])
                                     for resource in resource_names]

    totals = []
    for i, resource in enumerate(resource_names):
        subset = usage[usage.name == resource]

        ys_capacity = subset.capacity_at_event_time

        xs = subset.event_time
        
        utilisation = 0
        max_capacity = resource_capacities.at[resource, "capacity"]
        #max_utilisation = max_capacity * max_time
        #max_utilisation = max_capacity * max(xs)
        
        j = 0
        max_utilisation = 0
        for k, capacity in ys_capacity.iteritems():
            if j == 0:
                point_utilisation = 0
                point_max = xs.iloc[0] * max_capacity
            else:
                point_utilisation = (xs.iloc[j] - xs.iloc[j-1]) * capacity
                point_max = (xs.iloc[j] - xs.iloc[j-1]) * max_capacity
            max_utilisation += point_max
            utilisation += point_utilisation
            j += 1
            
        utilisation = utilisation / max_utilisation
            
        totals.append((resource, utilisation))
    
    # total_utilisation = pd.DataFrame(totals)
    totals.sort(key=lambda x: x[1], reverse=True)
    sorted_values = [t[1] for t in totals]
    sorted_names = [t[0] for t in totals]
    
    plt.bar(range(n_resources), sorted_values)
    
    max_value = np.max(sorted_values)
    
    if max_value > 0.5:
        plt.ylim((0, 1))
    else:
        plt.ylim((0, 0.5))
        
    plt.xticks(range(n_resources), sorted_names, rotation=90)
    plt.axhline(np.mean(sorted_values), color="red", linestyle=":")
    plt.title("Total Utilisation (Capacity) as a \nProportion of Max Capacity for {}".format(resource_type))

In [16]:
def make_plots(proportion : int, combination : str):
    
    hospital_usage_regex = make_regex(proportion, combination, "codestroke_usage")
    angio_usage_regex = make_regex(proportion, combination, "acutebed_usage")
    simdata_regex = make_regex(proportion, combination, "simdata")
    
    hospital_usage_file = [file for file in all_files if hospital_usage_regex.match(file)]
    angio_usage_file = [file for file in all_files if angio_usage_regex.match(file)]
    simdata_file = [file for file in all_files if simdata_regex.match(file)]
    
    try:
        assert len(hospital_usage_file) == 1
        assert len(angio_usage_file) == 1
        assert len(simdata_file) == 1
    except AssertionError:
        print(hospital_usage_file)
        print(angio_usage_file)
        print(simdata_file)
    
    hospital_usage = pd.read_csv(hospital_usage_file[0])
    angio_usage = pd.read_csv(angio_usage_file[0])
    
    hospital_data, central_hospitals, angio_capacities = get_hospital_data(combination)

    hospitals = set(hospital_usage.name)
    
    plt.figure()
    matplotlib.rcParams['figure.figsize'] = [20, 20]
    plot_utilisation_over_time(hospital_usage, hospital_data, "Codestroke")
    plt.savefig(hospital_usage_file[0].replace("../data/processed/", "../graphics/").replace(".csv", "_utilisation.png"))
    plt.close()
    
    plt.figure()
    matplotlib.rcParams['figure.figsize'] = [20, 4]
    plot_utilisation_over_time(angio_usage, angio_capacities, "Acute Beds", plot_yticks=True)
    plt.savefig(angio_usage_file[0].replace("../data/processed/", "../graphics/").replace(".csv", "_utilisation.png"))
    plt.close()
    
    max_time = None # not used for now
    
    fig = plt.figure(figsize=(16,10))
    #matplotlib.rcParams['figure.figsize'] = [10, 16]
    plot_total_utilisation(hospital_usage, hospital_data, max_time, "Codestroke")
    fig.subplots_adjust(bottom = 0.5)
    fig.subplots_adjust(top = 1)
    plt.tight_layout()
    plt.savefig(hospital_usage_file[0].replace("../data/processed/", "../graphics/").replace(".csv", "_bar_comparison.png"))
    plt.close()
    
    fig = plt.figure(figsize=(8,8))
    #matplotlib.rcParams['figure.figsize'] = [6,6]
    plot_total_utilisation(angio_usage,
                           angio_capacities,
                           max_time, "Acute Beds")
    plt.tight_layout()
    plt.savefig(angio_usage_file[0].replace("../data/processed/", "../graphics/").replace(".csv", "_bar_comparison.png"))
    plt.close()

    simdata_df = pd.read_csv(simdata_file[0])
    csc_data = csc_data_dict[combination]
    
    plt.figure(figsize=(9,9))
    plot_csc(simdata_df, csc_data, 1)
    #plt.tight_layout()
    plt.savefig(hospital_usage_file[0].replace("../data/processed/", "../graphics/").replace("_codestroke_usage.csv", "_mapped_small.png"))
    plt.close()
    
    plt.figure(figsize=(9,9))
    plot_csc(simdata_df, csc_data, 4.5, show_hospitals=False)
    #plt.tight_layout()
    plt.savefig(hospital_usage_file[0].replace("../data/processed/", "../graphics/").replace("_codestroke_usage.csv", "_mapped_large.png"))
    plt.close()

# Running the Plotting Loop

In [17]:
for proportion in ecr_proportion_list:
    for combination in combination_list:
        make_plots(proportion, combination)



<matplotlib.figure.Figure at 0x7fa8964fce10>

<matplotlib.figure.Figure at 0x7fa89436e710>

<matplotlib.figure.Figure at 0x7fa893479ba8>

<matplotlib.figure.Figure at 0x7fa892f5c630>

<matplotlib.figure.Figure at 0x7fa8928e3b00>

<matplotlib.figure.Figure at 0x7fa89193de10>

<matplotlib.figure.Figure at 0x7fa8911adb38>

<matplotlib.figure.Figure at 0x7fa890379198>

<matplotlib.figure.Figure at 0x7fa88fb586a0>

<matplotlib.figure.Figure at 0x7fa88edbeef0>

<matplotlib.figure.Figure at 0x7fa88e7e7e10>

<matplotlib.figure.Figure at 0x7fa88d7e7a20>

<matplotlib.figure.Figure at 0x7fa88d0beac8>

<matplotlib.figure.Figure at 0x7fa88d900160>

<matplotlib.figure.Figure at 0x7fa88bdf4a90>

<matplotlib.figure.Figure at 0x7fa88a9d27b8>

<matplotlib.figure.Figure at 0x7fa88a263da0>

<matplotlib.figure.Figure at 0x7fa88a622cc0>

<matplotlib.figure.Figure at 0x7fa888e28400>

<matplotlib.figure.Figure at 0x7fa887d786a0>

<matplotlib.figure.Figure at 0x7fa88790fb70>

<matplotlib.figure.Figure at 0x7fa8868fae48>

<matplotlib.figure.Figure at 0x7fa88615cac8>

<matplotlib.figure.Figure at 0x7fa8865ee828>

<matplotlib.figure.Figure at 0x7fa8865dcef0>

<matplotlib.figure.Figure at 0x7fa884e96a90>

<matplotlib.figure.Figure at 0x7fa8835b4860>

<matplotlib.figure.Figure at 0x7fa882712780>

<matplotlib.figure.Figure at 0x7fa88209fbe0>

<matplotlib.figure.Figure at 0x7fa881d8ed68>

<matplotlib.figure.Figure at 0x7fa880d20f60>

<matplotlib.figure.Figure at 0x7fa87fd60c18>