In [23]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os

from shapely.geometry import LineString

# remote i/o

In [24]:
# project directory
root_dir = os.path.join("D:/github/ranch")

external_dir = os.path.join(root_dir, "data", "external")
interim_dir = os.path.join(root_dir, "data", "interim")

In [65]:
routing_result_dir = os.path.join(root_dir, 'tests', 'scratch', 'test_vta_more_routes')

In [66]:
gtfs_input_dir = os.path.join(external_dir, 'gtfs', 'vta', 'VTA_2015_8_27')

# read gtfs and routing results

In [67]:
routing_df = gpd.read_file(os.path.join(routing_result_dir, 'bus_routing.geojson'))
routing_df.rename(columns = {'source':'routing_method'}, inplace = True)

stop_times_df = pd.read_csv(
    os.path.join(gtfs_input_dir, 'stop_times.txt'), dtype = {'trip_id':object}
)

stops_df = pd.read_csv(
    os.path.join(gtfs_input_dir, 'stops.txt')
)

routes_df = pd.read_csv(
    os.path.join(gtfs_input_dir, 'routes.txt')
)

trips_df = pd.read_csv(
    os.path.join(gtfs_input_dir, 'trips.txt'), dtype = {'trip_id':object, 'shape_id':object}
)

shapes_df = pd.read_csv(
    os.path.join(gtfs_input_dir, 'shapes.txt'), dtype = {'shape_id':object}
)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [68]:
stops_df

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id
0,4785,64785,SANTA TERESA STATION (1),,37.235966,-121.789491,1
1,4735,64735,ALMADEN STATION (1),,37.244565,-121.870711,1
2,4733,64733,OHLONE-CHYNOWETH STATION (1),,37.257423,-121.860279,1
3,4780,64780,CAPITOL STATION (1),,37.273901,-121.862964,1
4,4778,64778,TAMIEN STATION (1),,37.311257,-121.884300,1
...,...,...,...,...,...,...,...
3861,5662,65662,CAMINO ARROYO & GILROY CROSSING,,37.001771,-121.550668,1
3862,5422,65422,CAMINO ARROYO & LINDSTEADT,,37.007519,-121.553642,1
3863,5423,65423,CAMINO ARROYO & RENZ,,37.005830,-121.551771,1
3864,5659,65659,CAMINO ARROYO & RENZ,,37.006689,-121.552359,1


In [69]:
routing_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 29775 entries, 0 to 29774
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   u                   29775 non-null  float64 
 1   v                   29775 non-null  float64 
 2   agency_raw_name     29775 non-null  object  
 3   shape_id            29775 non-null  object  
 4   trip_id             29775 non-null  object  
 5   fromIntersectionId  29775 non-null  object  
 6   toIntersectionId    29775 non-null  object  
 7   shstReferenceId     29775 non-null  object  
 8   shstGeometryId      29775 non-null  object  
 9   agency_shape_id     29775 non-null  object  
 10  method              29775 non-null  object  
 11  geometry            29775 non-null  geometry
dtypes: float64(2), geometry(1), object(9)
memory usage: 2.7+ MB


# add route name to trips

In [70]:
# add route names to trips

trips_df = pd.merge(
    trips_df,
    routes_df[['route_id', 'route_short_name', 'route_long_name']],
    how = 'left',
    on = ['route_id']
)

In [71]:
trips_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8606 entries, 0 to 8605
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   route_id          8606 non-null   int64 
 1   service_id        8606 non-null   object
 2   trip_id           8606 non-null   object
 3   trip_headsign     8606 non-null   object
 4   direction_id      8606 non-null   int64 
 5   block_id          8606 non-null   int64 
 6   shape_id          8606 non-null   object
 7   route_short_name  8606 non-null   int64 
 8   route_long_name   8606 non-null   object
dtypes: int64(4), object(5)
memory usage: 672.3+ KB


# add trip info to ranch routing result

In [72]:
# add trip info to routing result

routing_df = pd.merge(
    routing_df,
    trips_df[['trip_id', 'shape_id', 'route_id', 'direction_id', 'route_short_name', 'route_long_name']],
    how='left',
    on =['trip_id', 'shape_id']
)

routing_df['source'] = 'routing'

In [73]:
routing_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 29775 entries, 0 to 29774
Data columns (total 17 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   u                   29775 non-null  float64 
 1   v                   29775 non-null  float64 
 2   agency_raw_name     29775 non-null  object  
 3   shape_id            29775 non-null  object  
 4   trip_id             29775 non-null  object  
 5   fromIntersectionId  29775 non-null  object  
 6   toIntersectionId    29775 non-null  object  
 7   shstReferenceId     29775 non-null  object  
 8   shstGeometryId      29775 non-null  object  
 9   agency_shape_id     29775 non-null  object  
 10  method              29775 non-null  object  
 11  geometry            29775 non-null  geometry
 12  route_id            29775 non-null  int64   
 13  direction_id        29775 non-null  int64   
 14  route_short_name    29775 non-null  int64   
 15  route_long_name     29775 no

In [74]:
trips_df = trips_df[trips_df.trip_id.isin(routing_df.trip_id.unique())]

In [75]:
trips_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 107 entries, 0 to 2185
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   route_id          107 non-null    int64 
 1   service_id        107 non-null    object
 2   trip_id           107 non-null    object
 3   trip_headsign     107 non-null    object
 4   direction_id      107 non-null    int64 
 5   block_id          107 non-null    int64 
 6   shape_id          107 non-null    object
 7   route_short_name  107 non-null    int64 
 8   route_long_name   107 non-null    object
dtypes: int64(4), object(5)
memory usage: 8.4+ KB


# create stop time shapes from gtfs

In [76]:
stop_times_df = pd.merge(
    stop_times_df,
    stops_df[['stop_id', 'stop_lat', 'stop_lon']],
    how='left',
    on =['stop_id']
)

stop_times_gdf = gpd.GeoDataFrame(
    stop_times_df,
    geometry = gpd.points_from_xy(stop_times_df['stop_lon'], stop_times_df['stop_lat']),
    crs = routing_df.crs
)

stop_times_line_gdf = stop_times_gdf.sort_values(by=['stop_sequence']).groupby(['trip_id'])['geometry'].apply(
    lambda x: LineString(x.tolist())
)

stop_times_line_gdf = gpd.GeoDataFrame(stop_times_line_gdf, geometry = 'geometry')

stop_times_line_gdf = pd.merge(
    stop_times_line_gdf,
    trips_df[['trip_id', 'shape_id', 'route_id', 'direction_id', 'route_short_name', 'route_long_name']],
    how='inner',
    on =['trip_id']
)

stop_times_line_gdf['source'] = 'stop_times'

shapes_df = pd.merge(
    shapes_df,
    trips_df[['trip_id', 'shape_id', 'route_id', 'direction_id', 'route_short_name', 'route_long_name']],
    how = 'inner',
    on = ['shape_id']
)

# create shapes from gtfs

In [77]:
shapes_gdf = gpd.GeoDataFrame(
    shapes_df,
    geometry = gpd.points_from_xy(shapes_df['shape_pt_lon'], shapes_df['shape_pt_lat']),
    crs = routing_df.crs
)

shapes_line_gdf = shapes_gdf.sort_values(by=['shape_pt_sequence']).groupby(['shape_id'])['geometry'].apply(
    lambda x: LineString(x.tolist())
)

shapes_line_gdf = gpd.GeoDataFrame(shapes_line_gdf, geometry = 'geometry')

shapes_line_gdf = pd.merge(
    shapes_line_gdf,
    trips_df[['trip_id', 'shape_id', 'route_id', 'direction_id', 'route_short_name', 'route_long_name']],
    how = 'inner',
    on = ['shape_id']
)

shapes_line_gdf['source'] ='shapes'

# combining the 3 data

In [78]:
print(routing_df.columns)
print(stop_times_line_gdf.columns)
print(shapes_line_gdf.columns)

Index(['u', 'v', 'agency_raw_name', 'shape_id', 'trip_id',
       'fromIntersectionId', 'toIntersectionId', 'shstReferenceId',
       'shstGeometryId', 'agency_shape_id', 'method', 'geometry', 'route_id',
       'direction_id', 'route_short_name', 'route_long_name', 'source'],
      dtype='object')
Index(['trip_id', 'geometry', 'shape_id', 'route_id', 'direction_id',
       'route_short_name', 'route_long_name', 'source'],
      dtype='object')
Index(['shape_id', 'geometry', 'trip_id', 'route_id', 'direction_id',
       'route_short_name', 'route_long_name', 'source'],
      dtype='object')


In [79]:
routing_df.trip_id.nunique()

107

In [80]:
stop_times_line_gdf.trip_id.nunique()

107

In [81]:
shapes_line_gdf.trip_id.nunique()

107

In [82]:
shapes_line_gdf.shape_id.nunique()

33

In [83]:
routing_df.shape_id.nunique()

33

In [84]:
out_df = pd.concat(
    [routing_df, stop_times_line_gdf, shapes_line_gdf],
    sort = False,
    ignore_index = True
)

# write out tableau data

In [85]:
out_df.to_file(os.path.join(routing_result_dir, 'routing-reivew.geojson'), driver = 'GeoJSON')