In [1]:
import pandas as pd
import hillmaker as hm
from hillmaker.scenario import create_scenario

In [4]:
from hillmaker import bydatetime

In [8]:
help(bydatetime.make_bydatetime)

Help on function make_bydatetime in module hillmaker.bydatetime:

make_bydatetime(stops_df, infield, outfield, start_analysis, end_analysis, catfield=None, bin_size_minutes=60, cat_to_exclude=None, totals=1, occ_weight_field=None, edge_bins=1, verbose=0)
    Create bydatetime table based on user inputs.
    
    This is the table from which summary statistics can be computed.
    
    Parameters
    ----------
    stops_df: DataFrame
        Stop data
    
    infield: string
        Name of column in stops_df to use as arrival datetime
    
    outfield: string
        Name of column in stops_df to use as departure datetime
    
    start_analysis: datetime
        Start date for the analysis
    
    end_analysis: datetime
        End date for the analysis
    
    catfield : string or List of strings, optional
        Column name(s) corresponding to the categories. If none is specified, then only overall occupancy is analyzed.
    
    bin_size_minutes: int, default 60
        Bin s

In [5]:
stops_df = pd.read_csv("rectypes.csv", parse_dates=['InRoomTS','OutRoomTS'], comment="#")

In [6]:
stops_df.tail(4)

Unnamed: 0,PatID,InRoomTS,OutRoomTS,PatType
16,17,1996-01-01 06:10:00,1996-01-01 08:00:00,inner_mbins_boundary
17,18,1996-01-01 07:00:00,1996-01-01 09:00:00,inner_mbins_boundary
18,19,1996-01-01 23:50:00,1996-01-02 01:30:00,inner_mbins_boundary_overmid
19,20,1996-01-02 23:30:00,1996-01-03 01:30:00,inner_mbins_boundary_overmid


In [13]:
bydt_dfs = bydatetime.make_bydatetime(stops_df, 'InRoomTS', 'OutRoomTS', '1/1/1996', '1/3/1996 23:30', 
                                     'PatType', 30)

In [14]:
bydt_dfs.keys()

dict_keys(['PatType_datetime', 'datetime'])

In [15]:
df1 = bydt_dfs['PatType_datetime']

In [17]:
df1[(df1.PatType=='inner_twobins') & (df1.bin_of_week > 30)]

AttributeError: 'DataFrame' object has no attribute 'PatType'

In [None]:
bydt_df[bydt_df.category.str.contains('inner_mbins_boundary') & (bydt_df.bin_of_week > 10)]

In [18]:
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,arrivals,departures,occupancy,day_of_week,dow_name,bin_of_day,bin_of_week
PatType,datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
inner_mbins_boundary,1996-01-01 00:00:00,0.0,0.0,0.0,0,Monday,0,0
inner_mbins_boundary,1996-01-01 00:30:00,0.0,0.0,0.0,0,Monday,1,1
inner_mbins_boundary,1996-01-01 01:00:00,0.0,0.0,0.0,0,Monday,2,2
inner_mbins_boundary,1996-01-01 01:30:00,0.0,0.0,0.0,0,Monday,3,3
inner_mbins_boundary,1996-01-01 02:00:00,0.0,0.0,0.0,0,Monday,4,4
...,...,...,...,...,...,...,...,...
right_mbins_frac,1996-01-03 21:30:00,0.0,0.0,1.0,2,Wednesday,43,139
right_mbins_frac,1996-01-03 22:00:00,0.0,0.0,1.0,2,Wednesday,44,140
right_mbins_frac,1996-01-03 22:30:00,0.0,0.0,1.0,2,Wednesday,45,141
right_mbins_frac,1996-01-03 23:00:00,0.0,0.0,1.0,2,Wednesday,46,142


In [7]:
def test_inner_mbins_lrfrac():
    # Create test case
    # 1/1/2024 7:20,1/1/2024 8:50,inner_mbins_frac

    scenario_name = 'inner_mbins_lrfrac'
    bin_size_minutes = 30
    start_analysis_dt = pd.Timestamp('2024-01-01')
    end_analysis_dt = pd.Timestamp('2024-01-01')
    stop_record = {'InRoomTS': pd.Timestamp('2024-01-01 7:20'),
                   'OutRoomTS': pd.Timestamp('2024-01-01 8:50')}

    stops_df = pd.DataFrame({k: [v] for k, v in stop_record.items()})
    print(stops_df)

    scenario_params = {'scenario_name': scenario_name,
                       'data': stops_df,
                       'in_field': 'InRoomTS', 'out_field': 'OutRoomTS',
                       'start_analysis_dt': start_analysis_dt,
                       'end_analysis_dt': end_analysis_dt,
                       'bin_size_minutes': bin_size_minutes}

    # Create scenario
    scenario = create_scenario(scenario_params)
    scenario.compute_hills_stats()

    bydatetime_df = scenario.get_bydatetime_df(by_category=False)
    summary_df = scenario.get_summary_df(by_category=False)
    return bydatetime_df, summary_df

    #assert bydatedf[bydatedf.loc['datetime'] == pd.Timestamp('2024-01-01 7:00')]['arrivals'] == 1.0
    #print(bydatedf[['arrivals', 'departures', 'occupancy']])

In [8]:
bydatetime_df, summary_df = test_inner_mbins_lrfrac()

             InRoomTS           OutRoomTS
0 2024-01-01 07:20:00 2024-01-01 08:50:00


In [9]:
bydatetime_df

Unnamed: 0_level_0,arrivals,departures,occupancy,day_of_week,dow_name,bin_of_day_str,bin_of_day,bin_of_week
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-01-01 00:00:00,0.0,0.0,0.0,0,Mon,00:00,0,0
2024-01-01 00:30:00,0.0,0.0,0.0,0,Mon,00:30,1,1
2024-01-01 01:00:00,0.0,0.0,0.0,0,Mon,01:00,2,2
2024-01-01 01:30:00,0.0,0.0,0.0,0,Mon,01:30,3,3
2024-01-01 02:00:00,0.0,0.0,0.0,0,Mon,02:00,4,4
2024-01-01 02:30:00,0.0,0.0,0.0,0,Mon,02:30,5,5
2024-01-01 03:00:00,0.0,0.0,0.0,0,Mon,03:00,6,6
2024-01-01 03:30:00,0.0,0.0,0.0,0,Mon,03:30,7,7
2024-01-01 04:00:00,0.0,0.0,0.0,0,Mon,04:00,8,8
2024-01-01 04:30:00,0.0,0.0,0.0,0,Mon,04:30,9,9


In [10]:
summary_df

Unnamed: 0,day_of_week,dow_name,bin_of_day,bin_of_day_str,count,mean,min,max,stdev,sem,var,cv,skew,kurt,p25,p50,p75,p95,p99
0,0,Mon,0,00:00,1.0,0.0,0.0,0.0,,,,0.0,,,0.0,0.0,0.0,0.0,0.0
1,0,Mon,1,00:30,1.0,0.0,0.0,0.0,,,,0.0,,,0.0,0.0,0.0,0.0,0.0
2,0,Mon,2,01:00,1.0,0.0,0.0,0.0,,,,0.0,,,0.0,0.0,0.0,0.0,0.0
3,0,Mon,3,01:30,1.0,0.0,0.0,0.0,,,,0.0,,,0.0,0.0,0.0,0.0,0.0
4,0,Mon,4,02:00,1.0,0.0,0.0,0.0,,,,0.0,,,0.0,0.0,0.0,0.0,0.0
5,0,Mon,5,02:30,1.0,0.0,0.0,0.0,,,,0.0,,,0.0,0.0,0.0,0.0,0.0
6,0,Mon,6,03:00,1.0,0.0,0.0,0.0,,,,0.0,,,0.0,0.0,0.0,0.0,0.0
7,0,Mon,7,03:30,1.0,0.0,0.0,0.0,,,,0.0,,,0.0,0.0,0.0,0.0,0.0
8,0,Mon,8,04:00,1.0,0.0,0.0,0.0,,,,0.0,,,0.0,0.0,0.0,0.0,0.0
9,0,Mon,9,04:30,1.0,0.0,0.0,0.0,,,,0.0,,,0.0,0.0,0.0,0.0,0.0


In [12]:
bydatetime_df.loc[pd.Timestamp('2024-01-01 7:00')]['arrivals']

1.0

In [13]:
def test_inner_mbins_lfrac():
    # Create test case
    # 1/1/2024 7:20,1/1/2024 8:50,inner_mbins_frac

    scenario_name = 'inner_mbins_lrfrac'
    bin_size_minutes = 30
    start_analysis_dt = pd.Timestamp('2024-01-01')
    end_analysis_dt = pd.Timestamp('2024-01-01')
    stop_record = {'InRoomTS': pd.Timestamp('2024-01-01 7:20'),
                   'OutRoomTS': pd.Timestamp('2024-01-01 9:30')}

    stops_df = pd.DataFrame({k: [v] for k, v in stop_record.items()})
    print(stops_df)

    scenario_params = {'scenario_name': scenario_name,
                       'data': stops_df,
                       'in_field': 'InRoomTS', 'out_field': 'OutRoomTS',
                       'start_analysis_dt': start_analysis_dt,
                       'end_analysis_dt': end_analysis_dt,
                       'bin_size_minutes': bin_size_minutes}

    # Create scenario
    scenario = create_scenario(scenario_params)
    scenario.compute_hills_stats()

    bydatetime_df = scenario.get_bydatetime_df(by_category=False)
    summary_df = scenario.get_summary_df(by_category=False)
    return bydatetime_df, summary_df

    #assert bydatedf[bydatedf.loc['datetime'] == pd.Timestamp('2024-01-01 7:00')]['arrivals'] == 1.0
    #print(bydatedf[['arrivals', 'departures', 'occupancy']])

In [14]:
bydatetime_df, summary_df = test_inner_mbins_lfrac()

             InRoomTS           OutRoomTS
0 2024-01-01 07:20:00 2024-01-01 09:30:00


In [15]:
bydatetime_df

Unnamed: 0_level_0,arrivals,departures,occupancy,day_of_week,dow_name,bin_of_day_str,bin_of_day,bin_of_week
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-01-01 00:00:00,0.0,0.0,0.0,0,Mon,00:00,0,0
2024-01-01 00:30:00,0.0,0.0,0.0,0,Mon,00:30,1,1
2024-01-01 01:00:00,0.0,0.0,0.0,0,Mon,01:00,2,2
2024-01-01 01:30:00,0.0,0.0,0.0,0,Mon,01:30,3,3
2024-01-01 02:00:00,0.0,0.0,0.0,0,Mon,02:00,4,4
2024-01-01 02:30:00,0.0,0.0,0.0,0,Mon,02:30,5,5
2024-01-01 03:00:00,0.0,0.0,0.0,0,Mon,03:00,6,6
2024-01-01 03:30:00,0.0,0.0,0.0,0,Mon,03:30,7,7
2024-01-01 04:00:00,0.0,0.0,0.0,0,Mon,04:00,8,8
2024-01-01 04:30:00,0.0,0.0,0.0,0,Mon,04:30,9,9


In [16]:
140/30

4.666666666666667

In [17]:
[30] * 17

[30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]

In [23]:
occ_series = pd.Series(bydatetime_df.loc[pd.Timestamp('2024-01-01 0:00'):pd.Timestamp('2024-01-01 8:00')]['occupancy'])

In [24]:
occ_series

datetime
2024-01-01 00:00:00    0.000000
2024-01-01 00:30:00    0.000000
2024-01-01 01:00:00    0.000000
2024-01-01 01:30:00    0.000000
2024-01-01 02:00:00    0.000000
2024-01-01 02:30:00    0.000000
2024-01-01 03:00:00    0.000000
2024-01-01 03:30:00    0.000000
2024-01-01 04:00:00    0.000000
2024-01-01 04:30:00    0.000000
2024-01-01 05:00:00    0.000000
2024-01-01 05:30:00    0.000000
2024-01-01 06:00:00    0.000000
2024-01-01 06:30:00    0.000000
2024-01-01 07:00:00    0.333333
2024-01-01 07:30:00    1.000000
2024-01-01 08:00:00    1.000000
Name: occupancy, dtype: float64

In [25]:
occ_series = occ_series.reset_index(drop=True)


In [26]:
occ_series


0     0.000000
1     0.000000
2     0.000000
3     0.000000
4     0.000000
5     0.000000
6     0.000000
7     0.000000
8     0.000000
9     0.000000
10    0.000000
11    0.000000
12    0.000000
13    0.000000
14    0.333333
15    1.000000
16    1.000000
Name: occupancy, dtype: float64