In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import re
import matplotlib.pyplot as plt
import seaborn as sns

# CAHOOTS Busy Log
Created using Computer-Aided Dispatch data from EPD dispatch between January 1, 2014 and December 31, 2024.

### Import CAD DataFrame

In [2]:
cad = pd.read_csv("../data/cleaned_full_class_data.csv")

#### Convert `calltime` to datetime object.

In [3]:
cad['calltime'] = pd.to_datetime(cad['calltime'], errors='coerce')

#### Subset needed variables.

In [4]:
busy_log = cad[cad['agency'] == 'CAHOOTS'][['inci_id', 'calltime', 'secs_to_disp', 'secs_to_arrv', 'secs_to_close', 'priority', 'units_arrived']]

#### Create new time stamp variables by adding `secs_to_...` variables to `calltime`.

In [5]:
busy_log.loc[:, 'dispatchtime'] = busy_log['calltime'] + pd.to_timedelta(busy_log['secs_to_disp'], unit='s')
busy_log.loc[:, 'arrivaltime'] = busy_log['calltime'] + pd.to_timedelta(busy_log['secs_to_arrv'], unit='s')
busy_log.loc[:, 'endtime'] = busy_log['calltime'] + pd.to_timedelta(busy_log['secs_to_close'], unit='s')
busy_log.loc[:, 'disp_to_arrv'] = busy_log['secs_to_arrv'] - busy_log['secs_to_disp']

busy_log = busy_log.drop(['secs_to_disp', 'secs_to_close'], axis=1)

In [6]:
busy_log

Unnamed: 0,inci_id,calltime,secs_to_arrv,priority,units_arrived,dispatchtime,arrivaltime,endtime,disp_to_arrv
8,14000107,2014-01-01 01:55:39,1833.0,5,1,2014-01-01 02:15:52,2014-01-01 02:26:12,2014-01-01 02:34:38,620.0
37,14000382,2014-01-01 12:50:34,545.0,5,1,2014-01-01 12:57:10,2014-01-01 12:59:39,2014-01-01 13:02:58,149.0
43,14000408,2014-01-01 13:29:17,2409.0,5,1,2014-01-01 13:37:42,2014-01-01 14:09:26,2014-01-01 14:37:53,1904.0
45,14000416,2014-01-01 13:38:38,600.0,1,1,2014-01-01 13:39:14,2014-01-01 13:48:38,2014-01-01 14:06:38,564.0
46,14000429,2014-01-01 14:05:20,2667.0,5,1,2014-01-01 14:37:55,2014-01-01 14:49:47,2014-01-01 15:51:56,712.0
...,...,...,...,...,...,...,...,...,...
484702,24349242,2024-12-31 18:51:45,2821.0,7,1,2024-12-31 19:33:19,2024-12-31 19:38:46,2024-12-31 20:08:56,327.0
484703,24349247,2024-12-31 18:58:42,1540.0,5,1,2024-12-31 19:11:41,2024-12-31 19:24:22,2024-12-31 19:33:14,761.0
484716,24349325,2024-12-31 20:33:06,452.0,7,1,2024-12-31 20:37:44,2024-12-31 20:40:38,2024-12-31 22:29:10,174.0
484721,24349346,2024-12-31 21:11:16,1442.0,5,1,2024-12-31 21:11:20,2024-12-31 21:35:18,2024-12-31 22:02:29,1438.0


### Create `vans_avail` column

#### Notable Dates

- 01/01/2017: Addition of 5-10 am service hours (Possibly November-December 2016)
- 10/01/2018: New van purchased and brought into service
- OR 1/1/2017 5am-12pm started hours

After 1/1/2017, and between 5am-12pm, there are two vans available for service. Otherwise there is one. 

In [7]:
cutoff_date = pd.Timestamp('2017-01-01')

busy_log['vans_avail'] = 1

busy_log.loc[
    (busy_log['calltime'] >= cutoff_date) &
    (busy_log['calltime'].dt.hour >= 5) &
    (busy_log['calltime'].dt.hour < 12),
    'vans_avail'
] = 2

### Create `vans_busy` column

#### Steps:
- For each call, check if there are other active calls happening during its calltime.
- If yes: `vans_busy == 1` (or even the number of vans currently occupied).
- If no: `vans_busy == 0`.

#### Notes:
- Tracks by number of calls, not by how many vans are on a call
- Max is 2 (means the two vans are both on calls at DISPATCH time)

In [8]:
busy_log['vans_busy'] = 0

for idx, row in busy_log.iterrows():
    overlaps = busy_log[
        (busy_log['dispatchtime'] <= row['calltime']) &
        (busy_log['endtime'] >= row['calltime'])
    ]
    busy_log.at[idx, 'vans_busy'] = max(len(overlaps) - 1, 0) 

busy_log['vans_busy'] = busy_log[['vans_busy', 'vans_avail']].min(axis=1)

#### Create `capacity` column using boolean logic

In [9]:
busy_log['capacity'] = (busy_log['vans_busy'] >= busy_log['vans_avail']).astype(int)

In [10]:
busy_log

Unnamed: 0,inci_id,calltime,secs_to_arrv,priority,units_arrived,dispatchtime,arrivaltime,endtime,disp_to_arrv,vans_avail,vans_busy,capacity
8,14000107,2014-01-01 01:55:39,1833.0,5,1,2014-01-01 02:15:52,2014-01-01 02:26:12,2014-01-01 02:34:38,620.0,1,0,0
37,14000382,2014-01-01 12:50:34,545.0,5,1,2014-01-01 12:57:10,2014-01-01 12:59:39,2014-01-01 13:02:58,149.0,1,0,0
43,14000408,2014-01-01 13:29:17,2409.0,5,1,2014-01-01 13:37:42,2014-01-01 14:09:26,2014-01-01 14:37:53,1904.0,1,0,0
45,14000416,2014-01-01 13:38:38,600.0,1,1,2014-01-01 13:39:14,2014-01-01 13:48:38,2014-01-01 14:06:38,564.0,1,0,0
46,14000429,2014-01-01 14:05:20,2667.0,5,1,2014-01-01 14:37:55,2014-01-01 14:49:47,2014-01-01 15:51:56,712.0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
484702,24349242,2024-12-31 18:51:45,2821.0,7,1,2024-12-31 19:33:19,2024-12-31 19:38:46,2024-12-31 20:08:56,327.0,1,0,0
484703,24349247,2024-12-31 18:58:42,1540.0,5,1,2024-12-31 19:11:41,2024-12-31 19:24:22,2024-12-31 19:33:14,761.0,1,0,0
484716,24349325,2024-12-31 20:33:06,452.0,7,1,2024-12-31 20:37:44,2024-12-31 20:40:38,2024-12-31 22:29:10,174.0,1,0,0
484721,24349346,2024-12-31 21:11:16,1442.0,5,1,2024-12-31 21:11:20,2024-12-31 21:35:18,2024-12-31 22:02:29,1438.0,1,0,0


### Quick checkpoint to see entries left in busy log DataFrame

In [11]:
print(f'There are {len(busy_log)} entries in the busy log.')
print(f'This is {len(busy_log)/len(cad)*100:.2f}% of the CAD data.')

There are 125115 entries in the busy log.
This is 25.81% of the CAD data.


### Load complete, busy log DataFrame into .csv file named `busy_log.csv`

In [12]:
busy_log.to_csv('../data/busy_log.csv', index=False)