# Weighted occupancy sample path approach

This is a preliminary exploration of how to go about implementing a weighted occupancy sample path approach to computing occupancy statistics.

## Preliminaries

In [1]:
# To auto-reload modules in jupyter notebook (so that changes in files *.py doesn't require manual reloading):
# https://stackoverflow.com/questions/5364050/reloading-submodules-in-ipython
%load_ext autoreload
%autoreload 2

Import commonly used libraries and magic command for inline plotting

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import hillmaker as hm


In [3]:
%matplotlib inline

In [4]:
ssu_stopdata = './data/ssu_2024.csv'
ssu_2024 = pd.read_csv(ssu_stopdata, parse_dates=['InRoomTS','OutRoomTS'])
ssu_2024.head()




Unnamed: 0,PatID,InRoomTS,OutRoomTS,PatType,LOS_hours
0,1,2024-01-01 07:44:00,2024-01-01 09:20:00,IVT,1.6
1,2,2024-01-01 08:28:00,2024-01-01 11:13:00,IVT,2.75
2,3,2024-01-01 11:44:00,2024-01-01 12:48:00,MYE,1.066667
3,4,2024-01-01 11:51:00,2024-01-01 21:10:00,CAT,9.316667
4,5,2024-01-01 12:10:00,2024-01-01 12:57:00,IVT,0.783333


Start by creating a dataframe based on the arrival timestamps.

In [5]:
arrivals_df = ssu_2024[['InRoomTS', 'PatType']]
arrivals_df = arrivals_df.rename(columns={'InRoomTS': 'ts'})
arrivals_df = arrivals_df.sort_values(['ts'])
arrivals_df['inc'] = 1.0
arrivals_df['prev_occ'] = 0.0
arrivals_df['new_occ'] = 0.0
arrivals_df

Unnamed: 0,ts,PatType,inc,prev_occ,new_occ
0,2024-01-01 07:44:00,IVT,1.0,0.0,0.0
1,2024-01-01 08:28:00,IVT,1.0,0.0,0.0
2,2024-01-01 11:44:00,MYE,1.0,0.0,0.0
3,2024-01-01 11:51:00,CAT,1.0,0.0,0.0
4,2024-01-01 12:10:00,IVT,1.0,0.0,0.0
...,...,...,...,...,...
59872,2024-09-30 19:31:00,IVT,1.0,0.0,0.0
59873,2024-09-30 20:23:00,IVT,1.0,0.0,0.0
59874,2024-09-30 21:00:00,CAT,1.0,0.0,0.0
59875,2024-09-30 21:57:00,IVT,1.0,0.0,0.0


In [17]:
departures_df = ssu_2024[['OutRoomTS', 'PatType']]
departures_df = departures_df.rename(columns={'OutRoomTS': 'ts'})
departures_df = departures_df.sort_values(['ts'])
departures_df['inc'] = -1.0
departures_df['new_occ'] = 0.0
departures_df['prev_occ'] = 0.0


start_analysis_dt = pd.Timestamp('2024-01-01')

init_df = pd.DataFrame({'ts': start_analysis_dt, 'inc': 0.0, 'prev_occ': 0.0, 'new_occ': 0.0}, index=[0])
init_df

Unnamed: 0,ts,inc,prev_occ,new_occ
0,2024-01-01,0.0,0.0,0.0


Now stack the dataframes and resort by `ts`.

In [20]:
arr_dep_df = pd.concat([init_df, arrivals_df, departures_df])
arr_dep_df = arr_dep_df.sort_values(['ts'])
arr_dep_df.reset_index(drop=True, inplace=True)
arr_dep_df

Unnamed: 0,ts,inc,prev_occ,new_occ,PatType
0,2024-01-01 00:00:00,0.0,0.0,0.0,
1,2024-01-01 07:44:00,1.0,0.0,0.0,IVT
2,2024-01-01 08:28:00,1.0,0.0,0.0,IVT
3,2024-01-01 09:20:00,-1.0,0.0,0.0,IVT
4,2024-01-01 11:13:00,-1.0,0.0,0.0,IVT
...,...,...,...,...,...
119750,2024-10-01 01:45:00,-1.0,0.0,0.0,CAT
119751,2024-10-01 01:58:00,-1.0,0.0,0.0,IVT
119752,2024-10-01 03:18:00,-1.0,0.0,0.0,CAT
119753,2024-10-01 04:37:00,-1.0,0.0,0.0,CAT


Now create a cumulative sum of the `inc` column. Then shift it down by a row so that it represents the previous occupancy before each row's new arrival or departure.

In [24]:
arr_dep_df['cumulative_inc'] = arr_dep_df['inc'].cumsum()
# No
arr_dep_df['cumulative_inc'] = arr_dep_df['cumulative_inc'].shift(1)
arr_dep_df

Unnamed: 0,ts,inc,prev_occ,new_occ,PatType,cumulative_inc
0,2024-01-01 00:00:00,0.0,0.0,0.0,,
1,2024-01-01 07:44:00,1.0,0.0,0.0,IVT,0.0
2,2024-01-01 08:28:00,1.0,0.0,0.0,IVT,1.0
3,2024-01-01 09:20:00,-1.0,0.0,0.0,IVT,2.0
4,2024-01-01 11:13:00,-1.0,0.0,0.0,IVT,1.0
...,...,...,...,...,...,...
119750,2024-10-01 01:45:00,-1.0,0.0,0.0,CAT,5.0
119751,2024-10-01 01:58:00,-1.0,0.0,0.0,IVT,4.0
119752,2024-10-01 03:18:00,-1.0,0.0,0.0,CAT,3.0
119753,2024-10-01 04:37:00,-1.0,0.0,0.0,CAT,2.0


In [31]:
arr_dep_df['weight_td'] = arr_dep_df.ts.diff()
# Shift up by one row to get weight on correct row
arr_dep_df['weight_td'] = arr_dep_df['weight_td'].shift(-1)
arr_dep_df['weight_seconds'] = arr_dep_df['weight_td'].map(lambda x: x / pd.Timedelta(1, 's'))
arr_dep_df

Unnamed: 0,ts,inc,prev_occ,new_occ,PatType,cumulative_inc,weight_td,weight_seconds
0,2024-01-01 00:00:00,0.0,0.0,0.0,,,0 days 07:44:00,27840.0
1,2024-01-01 07:44:00,1.0,0.0,0.0,IVT,0.0,0 days 00:44:00,2640.0
2,2024-01-01 08:28:00,1.0,0.0,0.0,IVT,1.0,0 days 00:52:00,3120.0
3,2024-01-01 09:20:00,-1.0,0.0,0.0,IVT,2.0,0 days 01:53:00,6780.0
4,2024-01-01 11:13:00,-1.0,0.0,0.0,IVT,1.0,0 days 00:31:00,1860.0
...,...,...,...,...,...,...,...,...
119750,2024-10-01 01:45:00,-1.0,0.0,0.0,CAT,5.0,0 days 00:13:00,780.0
119751,2024-10-01 01:58:00,-1.0,0.0,0.0,IVT,4.0,0 days 01:20:00,4800.0
119752,2024-10-01 03:18:00,-1.0,0.0,0.0,CAT,3.0,0 days 01:19:00,4740.0
119753,2024-10-01 04:37:00,-1.0,0.0,0.0,CAT,2.0,0 days 00:04:00,240.0


Fill in first row.

In [10]:
arr_dep_df.at[0, 'weight_td'] = arr_dep_df.at[0, 'ts'] - start_analysis_dt
arr_dep_df.at[0, 'weight_seconds'] = arr_dep_df.at[0, 'weight_td']/ pd.Timedelta(1, 's')
arr_dep_df

Unnamed: 0,ts,PatType,inc,prev_occ,new_occ,weight_td,weight_seconds
0,2024-01-01 07:44:00,IVT,1.0,0.0,0.0,0 days 07:44:00,27840.0
1,2024-01-01 08:28:00,IVT,1.0,0.0,0.0,0 days 00:44:00,2640.0
0,2024-01-01 09:20:00,IVT,-1.0,0.0,0.0,0 days 09:20:00,33600.0
1,2024-01-01 11:13:00,IVT,-1.0,0.0,0.0,0 days 01:53:00,6780.0
2,2024-01-01 11:44:00,MYE,1.0,0.0,0.0,0 days 00:31:00,1860.0
...,...,...,...,...,...,...,...
59786,2024-10-01 01:45:00,CAT,-1.0,0.0,0.0,0 days 01:53:00,6780.0
59875,2024-10-01 01:58:00,IVT,-1.0,0.0,0.0,0 days 00:13:00,780.0
59876,2024-10-01 03:18:00,CAT,-1.0,0.0,0.0,0 days 01:20:00,4800.0
59833,2024-10-01 04:37:00,CAT,-1.0,0.0,0.0,0 days 01:19:00,4740.0
