In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os

from shapely.geometry import LineString

In [2]:
# project directory
root_dir = os.path.join("D:/merced")

external_dir = os.path.join(root_dir, "data", "external")
interim_dir = os.path.join(root_dir, "data", "interim")

# software directory
ranch_dir = os.path.join("D:/github/Ranch")

# the folder where SharedStreets extractions live

shst_extract_dir = os.path.join(external_dir, "sharedstreets_extract")

# the folder where OSMNX extractions live

osm_extract_dir = os.path.join(external_dir, "osmnx_extract")

In [3]:
routing_df = gpd.read_file(os.path.join(interim_dir, 'bus_routing.geojson'))
routing_df.rename(columns = {'source':'routing_method'}, inplace = True)

stop_times_df = pd.read_csv(os.path.join(external_dir, 'gtfs', '2021', 'The Bus', 'stop_times.txt'),
                           dtype = {'trip_id':object})
stops_df = pd.read_csv(os.path.join(external_dir, 'gtfs', '2021', 'The Bus', 'stops.txt'))
routes_df = pd.read_csv(os.path.join(external_dir, 'gtfs', '2021', 'The Bus', 'routes.txt'))
trips_df = pd.read_csv(os.path.join(external_dir, 'gtfs', '2021', 'The Bus', 'trips.txt'),
                           dtype = {'trip_id':object, 'shape_id':object})
shapes_df = pd.read_csv(os.path.join(external_dir, 'gtfs', '2021', 'The Bus', 'shapes.txt'),
                           dtype = {'shape_id':object})

In [4]:
stops_df

Unnamed: 0,stop_id,stop_code,platform_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,position,direction,wheelchair_boarding,tts_stop_name,area_id
0,2584569,,,San Luis Plaza (Eastbound),,37.055539,-120.865785,,,0,,America/Los_Angeles,,,0,,
1,2584570,,,Dos Palos Y Cafe 33 (to Merced),,37.048181,-120.635910,,,0,,America/Los_Angeles,,,0,,
2,2584571,,,Dos Palos Cafe 33 (to Los Banos),,37.048215,-120.635942,,,0,,America/Los_Angeles,,,0,,
3,2584573,,,Foster Farms (To Merced),,37.391376,-120.722635,,,0,,America/Los_Angeles,,,0,,
4,2584574,,,G St @ 11th (southbound),,37.292708,-120.479035,,,0,,America/Los_Angeles,,,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
480,782489,,,Obanion Park,,36.992336,-120.626187,566.0,,0,,America/Los_Angeles,,,0,,566.0
481,835001,,,Castle H.S.A.,,37.361993,-120.572221,,,0,,America/Los_Angeles,,,0,,
482,835719,,,T St. @ 3rd St.,,37.291789,-120.503653,,,0,,America/Los_Angeles,,,0,,
483,844203,,,Foster Farms (To Livingston),,37.391296,-120.722486,562.0,,0,,America/Los_Angeles,,,0,,562.0


In [5]:
routing_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 11820 entries, 0 to 11819
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   u                   11820 non-null  int64   
 1   v                   11820 non-null  int64   
 2   agency_raw_name     11820 non-null  object  
 3   shape_id            11820 non-null  object  
 4   trip_id             11820 non-null  object  
 5   fromIntersectionId  11820 non-null  object  
 6   toIntersectionId    11820 non-null  object  
 7   shstReferenceId     11820 non-null  object  
 8   shstGeometryId      11820 non-null  object  
 9   agency_shape_id     11820 non-null  object  
 10  method              11820 non-null  object  
 11  geometry            11820 non-null  geometry
dtypes: geometry(1), int64(2), object(9)
memory usage: 1.1+ MB


In [7]:
trips_df = pd.merge(
    trips_df,
    routes_df[['route_id', 'route_short_name', 'route_long_name']],
    how = 'left',
    on = ['route_id']
)

In [8]:
trips_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 419 entries, 0 to 418
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   route_id                     419 non-null    int64  
 1   service_id                   419 non-null    object 
 2   trip_id                      419 non-null    object 
 3   trip_short_name              0 non-null      float64
 4   trip_headsign                56 non-null     object 
 5   direction_id                 419 non-null    int64  
 6   block_id                     56 non-null     float64
 7   shape_id                     419 non-null    object 
 8   bikes_allowed                0 non-null      float64
 9   wheelchair_accessible        0 non-null      float64
 10  trip_type                    0 non-null      float64
 11  drt_max_travel_time          0 non-null      float64
 12  drt_avg_travel_time          0 non-null      float64
 13  drt_advance_book_min

In [9]:
routing_df = pd.merge(
    routing_df,
    trips_df[['trip_id', 'shape_id', 'route_id', 'direction_id', 'route_short_name', 'route_long_name']],
    how='left',
    on =['trip_id', 'shape_id']
)

routing_df['source'] = 'routing'

In [10]:
routing_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 11820 entries, 0 to 11819
Data columns (total 17 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   u                   11820 non-null  int64   
 1   v                   11820 non-null  int64   
 2   agency_raw_name     11820 non-null  object  
 3   shape_id            11820 non-null  object  
 4   trip_id             11820 non-null  object  
 5   fromIntersectionId  11820 non-null  object  
 6   toIntersectionId    11820 non-null  object  
 7   shstReferenceId     11820 non-null  object  
 8   shstGeometryId      11820 non-null  object  
 9   agency_shape_id     11820 non-null  object  
 10  method              11820 non-null  object  
 11  geometry            11820 non-null  geometry
 12  route_id            11820 non-null  int64   
 13  direction_id        11820 non-null  int64   
 14  route_short_name    11820 non-null  object  
 15  route_long_name     11820 no

In [11]:
trips_df = trips_df[trips_df.trip_id.isin(routing_df.trip_id.unique())]

In [12]:
trips_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 67 entries, 0 to 404
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   route_id                     67 non-null     int64  
 1   service_id                   67 non-null     object 
 2   trip_id                      67 non-null     object 
 3   trip_short_name              0 non-null      float64
 4   trip_headsign                15 non-null     object 
 5   direction_id                 67 non-null     int64  
 6   block_id                     15 non-null     float64
 7   shape_id                     67 non-null     object 
 8   bikes_allowed                0 non-null      float64
 9   wheelchair_accessible        0 non-null      float64
 10  trip_type                    0 non-null      float64
 11  drt_max_travel_time          0 non-null      float64
 12  drt_avg_travel_time          0 non-null      float64
 13  drt_advance_book_min 

stop_times_df = pd.merge(
    stop_times_df,
    trips_df[['trip_id', 'shape_id', 'route_id', 'direction_id', 'route_short_name', 'route_long_name']],
    how='inner',
    on =['trip_id']
)

stop_times_df = pd.merge(
    stop_times_df,
    stops_df[['stop_id', 'stop_lat', 'stop_lon']],
    how='left',
    on =['stop_id']
)

In [13]:
stop_times_df = pd.merge(
    stop_times_df,
    stops_df[['stop_id', 'stop_lat', 'stop_lon']],
    how='left',
    on =['stop_id']
)

stop_times_gdf = gpd.GeoDataFrame(
    stop_times_df,
    geometry = gpd.points_from_xy(stop_times_df['stop_lon'], stop_times_df['stop_lat']),
    crs = routing_df.crs
)

stop_times_line_gdf = stop_times_gdf.sort_values(by=['stop_sequence']).groupby(['trip_id'])['geometry'].apply(
    lambda x: LineString(x.tolist())
)

stop_times_line_gdf = gpd.GeoDataFrame(stop_times_line_gdf, geometry = 'geometry')

stop_times_line_gdf = pd.merge(
    stop_times_line_gdf,
    trips_df[['trip_id', 'shape_id', 'route_id', 'direction_id', 'route_short_name', 'route_long_name']],
    how='inner',
    on =['trip_id']
)

stop_times_line_gdf['source'] = 'stop_times'

shapes_df = pd.merge(
    shapes_df,
    trips_df[['trip_id', 'shape_id', 'route_id', 'direction_id', 'route_short_name', 'route_long_name']],
    how = 'inner',
    on = ['shape_id']
)

In [14]:
shapes_gdf = gpd.GeoDataFrame(
    shapes_df,
    geometry = gpd.points_from_xy(shapes_df['shape_pt_lon'], shapes_df['shape_pt_lat']),
    crs = routing_df.crs
)

shapes_line_gdf = shapes_gdf.sort_values(by=['shape_pt_sequence']).groupby(['shape_id'])['geometry'].apply(
    lambda x: LineString(x.tolist())
)

shapes_line_gdf = gpd.GeoDataFrame(shapes_line_gdf, geometry = 'geometry')

shapes_line_gdf = pd.merge(
    shapes_line_gdf,
    trips_df[['trip_id', 'shape_id', 'route_id', 'direction_id', 'route_short_name', 'route_long_name']],
    how = 'inner',
    on = ['shape_id']
)

shapes_line_gdf['source'] ='shapes'

In [15]:
print(routing_df.columns)
print(stop_times_line_gdf.columns)
print(shapes_line_gdf.columns)

Index(['u', 'v', 'agency_raw_name', 'shape_id', 'trip_id',
       'fromIntersectionId', 'toIntersectionId', 'shstReferenceId',
       'shstGeometryId', 'agency_shape_id', 'method', 'geometry', 'route_id',
       'direction_id', 'route_short_name', 'route_long_name', 'source'],
      dtype='object')
Index(['trip_id', 'geometry', 'shape_id', 'route_id', 'direction_id',
       'route_short_name', 'route_long_name', 'source'],
      dtype='object')
Index(['shape_id', 'geometry', 'trip_id', 'route_id', 'direction_id',
       'route_short_name', 'route_long_name', 'source'],
      dtype='object')


In [16]:
routing_df.trip_id.nunique()

67

In [17]:
stop_times_line_gdf.trip_id.nunique()

67

In [18]:
shapes_line_gdf.trip_id.nunique()

67

In [19]:
shapes_line_gdf.shape_id.nunique()

33

In [20]:
routing_df.shape_id.nunique()

33

In [21]:
out_df = pd.concat(
    [routing_df, stop_times_line_gdf, shapes_line_gdf],
    sort = False,
    ignore_index = True
)

In [22]:
out_df.to_file(os.path.join(interim_dir, 'routing-reivew.geojson'), driver = 'GeoJSON')