In [5]:
from pathlib import Path

import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
from IPython.display import Image
# Import seaborn
import seaborn as sns

import hillmaker as hm

In [3]:
ssu_stopdata = '../data/ShortStay.csv'
ssu_stops_df = pd.read_csv(ssu_stopdata, parse_dates=['InRoomTS','OutRoomTS'])
ssu_stops_df.info() # Check out the structure of the resulting DataFrame

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59877 entries, 0 to 59876
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   PatID      59877 non-null  int64         
 1   InRoomTS   59877 non-null  datetime64[ns]
 2   OutRoomTS  59877 non-null  datetime64[ns]
 3   PatType    59877 non-null  object        
dtypes: datetime64[ns](2), int64(1), object(1)
memory usage: 1.8+ MB


In [6]:
# Required inputs
scenario_name = 'ssu_1'
stops_df = ssu_stops_df
in_field_name = 'InRoomTS'
out_field_name = 'OutRoomTS'
start_date = '1996-01-01'
end_date = '1996-09-30'

# Optional inputs

cat_field_name = 'PatType'
verbosity = 1 # INFO level logging
output_path = './output'
bin_size_minutes = 60

s1 = hm.Scenario(scenario_name=scenario_name, 
                         stops_df=stops_df,
                         in_field=in_field_name,
                         out_field=out_field_name,
                         start_analysis_dt=start_date,
                         end_analysis_dt=end_date,
                         cat_field=cat_field_name,
                         output_path=Path('./output'),
                         verbosity=verbosity)

In [7]:
s1.make_hills()

2023-08-24 16:39:15,574 - hillmaker.bydatetime - INFO - min of intime: 1996-01-01 07:44:00
2023-08-24 16:39:15,576 - hillmaker.bydatetime - INFO - max of intime: 1996-09-29 19:51:00
2023-08-24 16:39:15,577 - hillmaker.bydatetime - INFO - min of outtime: 1996-01-01 08:50:00
2023-08-24 16:39:15,578 - hillmaker.bydatetime - INFO - max of outtime: 1996-09-29 20:15:00
2023-08-24 16:39:15,580 - hillmaker.bydatetime - INFO - start analysis: 1996-01-01, end analysis: 1996-09-30
2023-08-24 16:39:15,608 - hillmaker.bydatetime - INFO - min of entry time_bin = 7
2023-08-24 16:39:15,610 - hillmaker.bydatetime - INFO - max of exit time_bin = 6546 and num_bins=6553
2023-08-24 16:39:16,222 - hillmaker.bydatetime - INFO - cat IVT {'inner': 33019}
2023-08-24 16:39:16,273 - hillmaker.bydatetime - INFO - cat IVT num_arrivals_hm 33019 num_arrivals_stops 33019
2023-08-24 16:39:16,274 - hillmaker.bydatetime - INFO - cat IVT num_departures_hm 33019 num_departures_stops 33019
2023-08-24 16:39:16,276 - hillmake

In [8]:
s1.get_summary_df?

[0;31mSignature:[0m
[0ms1[0m[0;34m.[0m[0mget_summary_df[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mflow_metric[0m[0;34m:[0m [0mstr[0m [0;34m=[0m [0;34m'occupancy'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mby_category[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstationary[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Get summary dataframe

Parameters
----------
flow_metric : str
    Either of 'arrivals', 'departures', 'occupancy' ('a', 'd', and 'o' are sufficient).
    Default='occupancy'
by_category : bool
    Default=True corresponds to category specific statistics. A value of False gives overall statistics.
stationary : bool
    Default=False corresponds to the standard nonstationary statistics (i.e. by TOD and DOW)

Returns
-------
DataFrame
[0;31mFile:[0m      ~/Documents/projects/hillmaker/src/hi

In [11]:
occ_summary_cat_df = s1.get_summary_df()
occ_summary_cat_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count,mean,min,max,stdev,sem,var,cv,skew,kurt,p25,p50,p75,p95,p99
PatType,day_of_week,dow_name,bin_of_day,bin_of_day_str,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
ART,0,Mon,0,00:00,40.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ART,0,Mon,1,01:00,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ART,0,Mon,2,02:00,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ART,0,Mon,3,03:00,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ART,0,Mon,4,04:00,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
OTH,6,Sun,19,19:00,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
OTH,6,Sun,20,20:00,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
OTH,6,Sun,21,21:00,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
OTH,6,Sun,22,22:00,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
occ_summary_df = s1.get_summary_df(by_category=False)
occ_summary_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,mean,min,max,stdev,sem,var,cv,skew,kurt,p25,p50,p75,p95,p99
day_of_week,dow_name,bin_of_day,bin_of_day_str,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,Mon,0,00:00,40.0,0.255000,0.0,1.833333,0.418453,0.066163,0.175103,1.640991,2.024153,4.603107,0.0,0.000000,0.495833,0.795833,1.644833
0,Mon,1,01:00,39.0,0.211966,0.0,1.600000,0.384266,0.061532,0.147660,1.812866,2.006676,3.737138,0.0,0.000000,0.208333,0.985000,1.372000
0,Mon,2,02:00,39.0,0.209402,0.0,0.916667,0.335340,0.053697,0.112453,1.601421,1.164433,-0.348937,0.0,0.000000,0.500000,0.900000,0.910333
0,Mon,3,03:00,39.0,0.194872,0.0,1.133333,0.334016,0.053485,0.111567,1.714029,1.545051,1.049713,0.0,0.000000,0.291667,0.905000,1.063667
0,Mon,4,04:00,39.0,0.114103,0.0,1.000000,0.258610,0.041411,0.066879,2.266471,2.307284,4.487256,0.0,0.000000,0.000000,0.643333,0.955667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,Sun,19,19:00,39.0,0.535470,0.0,3.266667,0.747004,0.119616,0.558014,1.395043,1.813045,3.776304,0.0,0.150000,0.816667,1.716667,2.905667
6,Sun,20,20:00,39.0,0.489316,0.0,2.000000,0.596168,0.095463,0.355416,1.218370,1.119442,0.457068,0.0,0.250000,0.791667,1.760000,2.000000
6,Sun,21,21:00,39.0,0.433761,0.0,1.966667,0.463310,0.074189,0.214656,1.068124,1.194987,1.781882,0.0,0.333333,0.741667,1.191667,1.757667
6,Sun,22,22:00,39.0,0.368376,0.0,2.066667,0.510315,0.081716,0.260421,1.385309,1.608689,2.409985,0.0,0.083333,0.650000,1.358333,1.883000


In [None]:
def make_week_hill_plot(summary_df: pd.DataFrame, scenario_name: str, metric: str,

                        bin_size_minutes: int = 60,
                        cap: int = None,
                        first_dow: str = 'Mon',
                        xlabel: str = 'Hour',
                        ylabel: str = 'Patients',
                        export_path: Path | str | None = None,):
    """
    Makes and optionally exports week plot for occupancy, arrivals, or departures.

    Takes output DataFrames of `summarize.summarize` and plots mean and percentile
    values for occupancy, arrival, and departure categories.

    Parameters
    ----------
    summary_df : DataFrame
        Single summary df from the output of `summarize.summarize`
    scenario_name : str
        Used in output filenames
    metric : str
        Name of make_hills summary df being plotted
    export_path : str or Path, optional
        Destination path for exported png files, default is current directory
    bin_size_minutes : int, optional
        Number of minutes in each time bin of the day, default is 60. Use a value that
        divides into 1440 with no remainder
    cap : int, optional
        Capacity of area being analyzed, default is None
    week_range : str
        Week range of summary df. Default is 'week', can also take the form of
        the first three characters of a day of week name (ex: 'tue')
    xlabel : str
        x-axis label, default='Hour'
    ylabel : str
        y-axis label, default='Patients'
    export_png : bool, default is False
        If True, plot is exported to png file to `export_path`
    """

    plt.style.use('seaborn-darkgrid')
    fig1 = plt.figure(figsize=(15, 10))
    ax1 = fig1.add_subplot(1, 1, 1)

    # infer number of days being plotted
    num_days = len(summary_df) / (60 / bin_size_minutes * 24)

    # Create a list to use as the X-axis values
    num_bins = num_days * 1440 / bin_size_minutes
    # TODO: This is a Monday. Make flexible so any dow can be "first".
    base_date_for_first_dow = '2015-01-05'
    timestamps = pd.date_range(base_date_for_first_dow, periods=num_bins, freq=f'{bin_size_minutes}Min').tolist()

    # Choose appropriate major and minor tick locations
    major_tick_locations = pd.date_range(f'{base_date_for_first_dow} 12:00:00', periods=7, freq='24H').tolist()
    minor_tick_locations = pd.date_range(f'{base_date_for_first_dow} 06:00:00', periods=42, freq='4H').tolist()

    # Set the tick locations for the axes object
    ax1.set_xticks(major_tick_locations)
    ax1.set_xticks(minor_tick_locations, minor=True)

    # Specify the mean occupancy and percentile values. TODO - let user choose series to plot
    mean_occ = summary_df['mean']
    pctile_occ = summary_df['p95']

    # Styling of bars, lines, plot area
    # Style the bars for mean occupancy
    bar_color = 'steelblue'

    # Style the line for the occupancy percentile
    pctile_line_style = '-'
    pctile_color = 'grey'

    # Add data to the plot
    # Mean occupancy as bars - here's the GOTCHA involving the bar width
    bar_width = 1 / (1440 / bin_size_minutes)
    ax1.bar(timestamps, mean_occ, label=f'Mean {metric}', width=bar_width, color=bar_color)

    # Some percentile as a line
    ax1.plot(timestamps, pctile_occ, linestyle=pctile_line_style, label=f'95th %ile {metric}', color=pctile_color)

    # establish capacity horizontal line if supplied
    if cap is not None and metric == 'occupancy':
        plt.axhline(cap, color='r', linestyle='--', label='Capacity')

    # Create formatter variables
    day_fmt = '' if num_days == 1 else '%a'
    dayofweek_formatter = DateFormatter(day_fmt)
    qtrday_formatter = DateFormatter('%H')

    # Format the tick labels
    ax1.xaxis.set_major_formatter(dayofweek_formatter)
    ax1.xaxis.set_minor_formatter(qtrday_formatter)

    # Slide the major tick labels underneath the default location by 20 points
    ax1.tick_params(which='major', pad=20)

    # Add other chart elements

    # Set plot and axis titles
    sup_title = fig1.suptitle(f'{metric.title()} by Time of Day - {week_range.title()}\n{scenario_name.title()}',
                              x=0.125, y=0.95, horizontalalignment='left', verticalalignment='top', fontsize=16)

    ax1.set_title('All category types', loc='left', style='italic')
    ax1.set_xlabel(xlabel)
    ax1.set_ylabel(ylabel)

    # Legend
    ax1.legend(loc='best', frameon=True, facecolor='w')

    # save figure
    if export_path is not None:
        week_range_str = 'week'
        plot_png = f'{scenario_name}_{metric}_plot_{week_range_str}.png'
        png_wpath = Path(export_path, plot_png)
        plt.savefig(png_wpath, bbox_extra_artists=[sup_title], bbox_inches='tight')

    # Suppress plot output in notebook
    plt.close()

    return fig1