In [1]:
import pandas as pd
import numpy as np

In [2]:
base = r"J:\Shared drives\TMD_TSA\Model\networks\Transit\gtfs\bnrd\4_imported_gtfs"
gtfs_base = r"J:\Shared drives\TMD_TSA\Model\networks\Transit\gtfs\bnrd\3_gtfs_post_move_stops"

base = r"J:\Shared drives\TMD_TSA\Model\networks\Transit\gtfs\bat_2019\4_imported_gtfs"
gtfs_base = r"J:\Shared drives\TMD_TSA\Model\networks\Transit\gtfs\bat_2019\2_gtfs_py"

In [3]:
tcad_routes = pd.read_csv(base + r"\Transit Routes.csv", low_memory = False)

trips = pd.read_csv(gtfs_base + r"\trips.txt", low_memory = False)
stop_times = pd.read_csv(gtfs_base + r"\stop_times.txt", low_memory = False)

In [4]:
trips['route_pattern_id'] = np.where(trips['route_pattern_id'].isnull(), trips['shape_id'], trips['route_pattern_id'])

In [5]:
def get_start_stop_times(stop_times):    
    '''for every trip, grab the start time and stop time of the trip
    
    Parameters
    -----------
    stop_times : df
        gtfs stop_times.txt in df format

    Returns
    --------
        flintstone : df
            df with start and stop times per trip

    '''
    chocula =0 
    for trip_id in stop_times['trip_id'].unique():
        max_row = stop_times.query('trip_id==@trip_id').query('stop_sequence == stop_sequence.max()')[['trip_id','arrival_time']]
        min_row = stop_times.query('trip_id==@trip_id').query('stop_sequence == stop_sequence.min()')[['trip_id','arrival_time']]
        r2 = min_row.merge(max_row, how='left', on='trip_id', suffixes = ('_start','_end'))
        if chocula == 0:
            flintstone = pd.DataFrame(r2)
        else:
            flintstone=pd.concat([flintstone,r2])
        chocula +=1
    return(flintstone)


In [6]:
def assign_tod(start_stop):
    '''calculate midpoint of trip, use midpoint to assign TOD
    
    Parameters
    -----------
    start_stop : df
        df with start and stop times per trip

    Returns
    --------
    start_stop :
        df with start time, stop time, midpoint time, and TOD per trip

    '''
    
    start_stop['at_end_dec'] = (
        (
            (start_stop['arrival_time_end'].str.split(":").str[0]).astype('int32')
            +
            ((start_stop['arrival_time_end'].str.split(":").str[1]
            ).astype('int32')/60)))
    start_stop['at_start_dec'] = (
        (
            (start_stop['arrival_time_start'].str.split(":").str[0]).astype('int32')
            +
            ((start_stop['arrival_time_start'].str.split(":").str[1]
            ).astype('int32')/60)))
    
    start_stop['midpoint'] = start_stop['at_start_dec'] + ((start_stop['at_end_dec']-start_stop['at_start_dec'])/2)
    start_stop['tod'] = np.where(start_stop['midpoint'].between(6.50,9.50),'AM', np.where(
        start_stop['midpoint'].between(9.50,15.00), 'MD', np.where(
            start_stop['midpoint'].between(15.00,19.00),'PM', 'NT' 
        )
            ) 
        )
    
    return start_stop


In [7]:
def trips_per_tod_per_route(start_stop_tod, tcad_routes, trips):
    route_name = {}
    for t in tcad_routes['Trip']:
        rn = tcad_routes.query('Trip == @t')['Route_Name'].to_list()[0]
        gtfs_n = trips.query('trip_id == @t')['route_pattern_id']
        route_name[rn] = gtfs_n.to_list()[0]

    # check if multiple route patterns per route name
    reltab = pd.DataFrame.from_dict(route_name, orient='index', columns = ['rpid']).reset_index()
    
    reltab['rpid'] = reltab['rpid'].astype('str')
    trips['route_pattern_id'] = trips['route_pattern_id'].astype('str')
    
    trips2 = trips.merge(reltab, how='left', left_on = 'route_pattern_id',right_on = 'rpid').rename(columns={'index':'Route_Name'})

    tod_trips = trips2.merge(start_stop_tod).groupby(by=['Route_Name','tod']).agg({'trip_id':'count'}).rename(columns = {'trip_id':'num_trips'}).reset_index()
    tod_pivot = tod_trips.pivot(index='Route_Name', columns = 'tod', values = 'num_trips').reset_index()

    

    return tod_pivot

In [8]:
start_stop = get_start_stop_times(stop_times) # simpson

In [9]:
start_stop_tod = assign_tod(start_stop) # smurf

In [10]:
tod_pivot = trips_per_tod_per_route(start_stop_tod, tcad_routes, trips)

In [None]:
print("AM: "+str(tod_pivot['AM'].sum()))
print("MD: "+str(tod_pivot['MD'].sum()))
print("PM: "+str(tod_pivot['PM'].sum()))
print("NT: "+str(tod_pivot['NT'].sum()))

In [11]:
tod_pivot.to_csv(base+r"\tod_pivot2.csv", index=False)