In [176]:
import pandas as pd
import numpy as np
import os
from arcgis.features import SpatialDataFrame

In [167]:
# calculate the number of trips that come within 1/4 mile of each block center
def blocks_trips_count (demo, gtfs, agency_ids, service_id, outfc):
    # set input and output file paths
    trips_path = os.path.join(gtfs, 'trips.txt')
    routes_path = os.path.join(gtfs, 'routes.txt')
    shapes_path = os.path.join(gtfs, 'shapes.txt')
    shapes_trips_path = os.path.join(gtfs, 'shapes_trips.txt')
    shapes_fc_path = os.path.join(workspace, 'shapes')
    demo_fc_path = os.path.join(workspace, demo)
    demo_centers_path = os.path.join(workspace, 'demo_centers')
    demo_centers_shapes_join_path = os.path.join(workspace, outfc)

    # read GTFS data into a pandas dataframe
    trips = pd.read_csv(trips_path)
    routes = pd.read_csv(routes_path)

    # group trips by shape_id to get a count of trips on that shape
    # keep route_id so it can be joined to routes later
    shape_count = trips.query(f'service_id == "{service_id}"')
    shape_count = shape_count[['route_id', 'service_id', 'shape_id']].groupby(['shape_id']).agg({'route_id': 'max', 'service_id': 'count'})
    shape_count.rename(index=str, columns={'service_id': 'trip_count'}, inplace=True)
    shape_count.head()

    # join to routes to get the agency_id for each route
    shape_count['shape_id'] = shape_count.index # need to copy index since merge does not preserve it
    shape_count = shape_count.merge(routes[['route_id', 'agency_id']], on='route_id', how='left')
    shape_count.head()

    # select only those routes who have the chosen agency_id
    shape_count = shape_count.query(f'agency_id in {agency_ids}')

    # create a feature class of the paths traversed by transit
    arcpy.conversion.GTFSShapesToFeatures(shapes_path, shapes_fc_path)

    # convert the shapes to a spatial dataframe so we can do table joins
    # in memory w/o having to create intermediate feature classes
    sdf = pd.DataFrame.spatial.from_featureclass(shapes_fc_path)
    sdf.head()

    # join the trip counts to the shape geometry
    shape_count_geom = sdf.merge(shape_count[['trip_count', 'shape_id']], on='shape_id', how='right')
    shape_count_geom.head()

    # write the sdf back to the shapes feature class
    shape_count_geom.spatial.to_featureclass(shapes_fc_path)

    # count the total number of trips accessible to each block
    # A stop is accessible to everyone in the block if it is within 1/4 mile of the block center

    # Convert polygon blocks to points
    arcpy.management.FeatureToPoint(demo_fc_path, 
                                    demo_centers_path, 
                                    "INSIDE")

    # Create the necessary FieldMappings object
    fms = arcpy.FieldMappings()

    # loop through the fields, adding them to the field map
    for field in arcpy.ListFields(demo_centers_path)[2:]:  # start at index 2 b/c attempting to add the shape field to the field map will cause an exception
        fm1 = arcpy.FieldMap()
        fm1.addInputField(demo_centers_path, field.name)
        fm1.mergeRule = 'First'
        fms.addFieldMap(fm1)

    # Add the trip count field from the shapes fc to a FieldMap object
    fm2 = arcpy.FieldMap()
    fm2.addInputField(shapes_fc_path, "trip_count")

    # we want the total number of trips to join to each block center
    fm2.mergeRule = 'Sum'

    # Add the trip count FieldMap to the FieldMappings Object
    fms.addFieldMap(fm2)

    # Create the output feature class, using the FieldMappings object
    arcpy.analysis.SpatialJoin(demo_centers_path, 
                               shapes_fc_path, 
                               demo_centers_shapes_join_path, 
                               "JOIN_ONE_TO_ONE", 
                               "KEEP_ALL", 
                               field_mapping=fms,
                               match_option="INTERSECT", 
                               search_radius="0.25 Miles"
                              )
    
    # the shapes and block centers are no longer needed
    arcpy.management.Delete(shapes_fc_path)
    arcpy.management.Delete(demo_centers_path)
    
    return demo_centers_shapes_join_path

In [168]:
def create_table(start_or_end, in_table, populations, join_field):
    
    # some variables will change depending on whether the input is for the start or end of the study period
    out_table = start_or_end+'_table'
    if start_or_end == 'start':
        field_list_index = 0
    else: field_list_index = 1
    
    # create a field list starting with the join field and trip counts
    fields = [join_field, 'trip_count']

    # extend the field list to include fields for selected population groups
    fields.extend([populations[field_list][field_list_index] for field_list in populations])


    # iterate through the fields and add them to a field mapping
    fms = arcpy.FieldMappings()
    for field in fields:
        fm = arcpy.FieldMap()
        fm.addInputField(in_table, field)
        fms.addFieldMap(fm)

    # export to a table
    arcpy.conversion.TableToTable(in_table, workspace, out_table, field_mapping=fms)

In [197]:
def service_change(fieldNames, population):

    with arcpy.da.SearchCursor('start_table', fieldNames) as cursor:
        
        # initialize variables for formula - see paper
        numerator = 0
        denominator = 0
        
        # loop through each row, calculating the necessary values for each block
        # add those values to numerator and denominator to keep a running tally
        for row in cursor:
            
            # start and end values for trips for each block
            if row[0] == None: # convert null values to 0
                ti_start = 0
            else: ti_start = row[0]
            if row[1] == None:
                ti_end = 0
            else: ti_end = row[1]

            # start and end values for subgroup population for each block
            pi_start = row[2]
            pi_end = row[3]

            
            # calculate percent change in trips
            if ti_start == ti_end:
                delta_ti = 0 # to catch blocks where trips started and ended at 0
            else:
                # blocks w/ start trips of 0 would return divide by 0 error
                # cap change at 100%, per Metro Transit's process
                try: 
                    delta_ti = (ti_end - ti_start) / ti_start 
                except:
                    delta_ti = 1 
            delta_ti = min(delta_ti, 1)

           
            # do the same for changes in population
            if pi_start == pi_end:
                delta_pi = 0
            else:
                try:
                    delta_pi = (pi_end - pi_start) / pi_start
                except:
                    delta_pi = 1
            delta_pi = min(delta_pi, 1)

            # a change in population of -100% would result in divide by 0 error 
            # in service level change formula.  Add 0 to numerator, since end
            # population (and therefore service level) is by definition 0 
            # in these blocks
            if delta_pi == -1:
                numerator += 0
            else: 
                numerator += pi_end * ((delta_ti + 1) / (delta_pi + 1) -1)

            denominator += pi_end

        weighted_trip_change = numerator / denominator
        return population, weighted_trip_change

In [193]:
def main(start, end, populations):
    
    # get the number of trips accessible to each block at the start and end of the study period
    start_trips = blocks_trips_count (start['demo'], 
                                      start['gtfs'], 
                                      start['agency_ids'], 
                                      start['service_id'], 
                                      start['outfc'])
    
    end_trips = blocks_trips_count (end['demo'], 
                                    end['gtfs'], 
                                    end['agency_ids'], 
                                    end['service_id'], 
                                    end['outfc'])
    
    # create start and end tables with fields for trip counts, selected population counts, and a join field for each block
    create_table('start', 'SEP09_blocks_trips', populations, start['join_on'])
    create_table('end', 'DEC19_blocks_trips', populations, end['join_on'])
   
    
    # join the tables together
    arcpy.management.AddJoin('start_table', 
                             start['join_on'], 
                             'end_table', 
                             end['join_on'])
    
    #calculate the weighted service level change for the each sub-population
    output_array = []
    for population in populations:
        fieldNames = ['start_table.trip_count', 
                      'end_table.trip_count', 
                      'start_table.'+populations[population][0], 
                      'end_table.'+populations[population][1]
                     ]
        output_array.append(service_change(fieldNames, population))
    
    # cacluate the relative service level change
    output_array = list(zip(*output_array))
    df = pd.DataFrame(output_array[1], index=output_array[0], columns=['service change'])
    return df

In [171]:
# set parameters.  The workspace should be a gdb 
# The demographic feature classes must be in a gdb
# the geometry of the demographic feature class at the start of the study period must be the same
# as the geometry of the feature class for the demographic data at the end of the study period.
# Each must have a common field to link them together.

workspace = r'C:\Users\orms0027\Documents\PA5234\PA5234.gdb'
arcpy.env.workspace = workspace

start = {
    'demo': "demo_2010",                                      # name of the demographic fc stored in the workspace gdb
    'gtfs': r'C:\Users\orms0027\Documents\PA5234\gtfs_SEP09', # directory where all the gtfs data are stored
    'agency_ids': [0, 1, 2],                                  # from agency.txt, which agencies to analyze
    'service_id': 'SEP09-Multi-Weekday-01',                   # from calendar.txt or calendar_dates.txt, the specific service to analyze
    'outfc': 'SEP09_blocks_trips',                            # name of the feature class with trip counts for each block
    'join_on': 'GISJOIN',                                     # field to join to end table on
}

end = {
    'demo': "demo_2019",
    'gtfs': r'C:\Users\orms0027\Documents\PA5234\gtfs_DEC19',
    'agency_ids': [0, 1, 2],
    'service_id': 'DEC19-MVS-BUS-Weekday-01',
    'outfc': 'DEC19_blocks_trips',
    'join_on': 'GISJOIN',
}

# each subpopulation to be analyzed is a key in a dict, where the value is a list of field names
# the first item in the list is the field for the population count in the start demo fc,
# the second is the field for the population count in the end demo fc
populations = {
    'Total': ['H7X001', 'totpop'],
    'White Alone': ['H7X002', 'raceandhispanicorigin_wht_cy'],
    'Black or African American alone' : ['H7X003', 'raceandhispanicorigin_black_cy'],
    'American Indian and Alaska Native alone': ['H7X004', 'raceandhispanicorigin_amerind_cy'],
    'Asian alone': ['H7X005', 'raceandhispanicorigin_asian_cy'],
    'Native Hawaiian and Other Pacific Islander alone': ['H7X006', 'raceandhispanicorigin_pacific_cy'],
    'Some Other Race alone': ['H7X007', 'raceandhispanicorigin_othrace_cy'], 
    'Two or More Races': ['H7X008', 'raceandhispanicorigin_race2up_cy']
}

In [198]:
# run the analysis
df = main(start, end, populations)
df

Unnamed: 0,service change
Total,-0.04666
White Alone,0.014853
Black or African American alone,-0.089188
American Indian and Alaska Native alone,0.158764
Asian alone,-0.127609
Native Hawaiian and Other Pacific Islander alone,-0.01898
Some Other Race alone,0.056723
Two or More Races,-0.045245
