In [None]:
import branca 
import folium
import geopandas as gpd
import numpy as np
import pandas as pd

import create_table
import neighbor
import utils
from update_vars import analysis_date, PROJECT_CRS, PARTRIDGE_FOLDER

In [None]:
one_trip = "30-046wc7nmq"
trip_filter = [[("trip_id", "==", one_trip)]]

trips = create_table.get_table(
    "trips", 
    analysis_date, 
    filters = trip_filter
)

trips.head(2)

In [None]:
shapes = create_table.get_table(
    "shapes", 
    analysis_date, 
    filters = [[("shape_id", "in", trips.shape_id)]],
    columns = ["shape_id", "geometry"]
).to_crs(PROJECT_CRS)

shapes.head(2)

In [None]:
trip_cols = ["schedule_gtfs_dataset_key", "trip_id"]

stops_projected = create_table.stop_times_projected_table(
    analysis_date, 
    filters = trip_filter,
)


# We don't do this in our pipeline, because vp meters is an array
# But this illustrates the point more clearly
stops_projected = stops_projected.assign(
    subseq_stop_meters = stops_projected.groupby(trip_cols).stop_meters.shift(-1)
).rename(columns = {"stop_id": "stop_id1"})

In [None]:
stops_condensed = utils.condense_by_trip(
    stops_projected, 
    group_cols = ["schedule_gtfs_dataset_key", "trip_id"],
    sort_cols = ["schedule_gtfs_dataset_key", "trip_id", "stop_sequence"],
    geometry_col = "geometry",
    array_cols = ["stop_sequence"]
)

current_stop_geom = np.array(stops_condensed.geometry.iloc[0].coords)
next_stop_geom = current_stop_geom[1:]
# distance_east, distance_north
direction_arr = np.asarray(
    # first value is unknown because there is no prior stop to compare to
    ["Unknown"] + 
    [utils.cardinal_definition_rules(pt[0] - prior_pt[0], pt[1] - prior_pt[1]) 
     for pt, prior_pt
     in zip(next_stop_geom, current_stop_geom)]
)

stops_projected["stop_primary_direction"] = direction_arr
stops_projected["stop_opposite_direction"] = stops_projected.stop_primary_direction.map(
    neighbor.OPPOSITE_DIRECTIONS)

In [None]:
vp_projected = create_table.vp_projected_table(
    analysis_date,
    filters = trip_filter
)   

In [None]:
stops_vp_map = stops_projected.explore(
    "stop_sequence",
    categorical=True, legend=False,
    tiles = "CartoDB Positron",
    name = "stops"
)

vp_projected[["vp_idx", "geometry"]].explore(
    "vp_idx",
    m=stops_vp_map, categorical=False,
    name="vp"
)

folium.LayerControl().add_to(stops_vp_map)
stops_vp_map

In [None]:
vp_nn = utils.condense_by_trip(
    vp_projected,
    group_cols = ["schedule_gtfs_dataset_key", "trip_id"],
    sort_cols = ["schedule_gtfs_dataset_key", "trip_id", "vp_idx"],
    geometry_col = "geometry",
    array_cols = ["vp_idx", "location_timestamp_local"]
)


current_stop_geom = np.array(vp_nn.geometry.iloc[0].coords)
next_stop_geom = current_stop_geom[1:]
# distance_east, distance_north
direction_arr = np.asarray(
    # first value is unknown because there is no prior stop to compare to
    ["Unknown"] + 
    [utils.cardinal_definition_rules(pt[0] - prior_pt[0], pt[1] - prior_pt[1]) 
     for pt, prior_pt
     in zip(next_stop_geom, current_stop_geom)]
)
vp_nn["vp_primary_direction"] = [direction_arr]

#vp_projected["vp_primary_direction"] = direction_arr

In [None]:
gdf = pd.merge(
    stops_projected.rename(columns = {"geometry": "stop_geometry"}),
    vp_nn.rename(columns = {"geometry": "vp_geometry"}),
    on = ["schedule_gtfs_dataset_key", "trip_id"],
    how = "inner"
).merge(
    shapes.rename(columns = {"geometry": "shape_geometry"}),
    on = "shape_id",
    how = "inner"
)

# https://datascientyst.com/combine-multiple-columns-into-single-one-in-pandas/
# Need a single column for trip across operators
gdf = gdf.assign(
    trip_instance_key = gdf[trip_cols].astype(str).T.agg("__".join)
)

In [None]:
vp_nn[["trip_id", "geometry"]].explore("trip_id", tiles = "CartoDB Positron")

In [None]:
vp_before, vp_after, vp_before_meters, vp_after_meters = np.vectorize(
    neighbor.two_nearest_neighbor_near_stop
)(
    gdf.vp_primary_direction, 
    gdf.vp_geometry, 
    gdf.vp_idx,
    gdf.stop_geometry,
    gdf.stop_opposite_direction,
    gdf.shape_geometry,
    gdf.stop_meters
)

In [None]:
gdf = gdf.assign(
    prior_vp_idx = vp_before,
    subseq_vp_idx = vp_after,
    prior_vp_meters = vp_before_meters, 
    subseq_vp_meters = vp_after_meters
)

In [None]:
start_time_series = []
end_time_series = []

for row in gdf.itertuples():
    start_time, end_time = neighbor.grab_vp_timestamp(
        getattr(row, "prior_vp_idx"),
        getattr(row, "subseq_vp_idx"),
        getattr(row, "vp_idx"),
        getattr(row, "location_timestamp_local"),
    )
    
    start_time_series.append(start_time)
    end_time_series.append(end_time)

In [None]:
gdf = gdf.assign(
    start_local_timestamp = start_time_series,
    end_local_timestamp = end_time_series
)

In [None]:
test_gdf = gdf[(gdf.prior_vp_idx != -1) & (gdf.subseq_vp_idx != -1)]

In [None]:
interpolated_arrival_series = []

for row in test_gdf.itertuples():

    arrival_time = neighbor.interpolate_stop_arrival_time(
        getattr(row, "stop_meters"),
        [getattr(row, "prior_vp_meters"), getattr(row, "subseq_vp_meters")],
        [getattr(row, "start_local_timestamp"), getattr(row, "end_local_timestamp")]
    )
    
    interpolated_arrival_series.append(arrival_time)

In [None]:
test_gdf["arrival_time"] = interpolated_arrival_series

In [None]:
test_gdf[["prior_vp_meters", "stop_meters", "subseq_vp_meters", 
          "start_local_timestamp", "arrival_time", "end_local_timestamp"]].head()

In [None]:
trip_stop_cols = [
    "trip_instance_key",
    "stop_sequence",
    "stop_meters"
]

arrival_times_df = test_gdf[trip_stop_cols + ["arrival_time"]] 

In [None]:
gdf2 = pd.merge(
    gdf,
    arrival_times_df,
    on = trip_stop_cols,
    how = "left"
)

In [None]:
drop_cols = [
    "stop_opposite_direction",
    "vp_geometry", "vp_idx",
    'location_timestamp_local', 'vp_primary_direction', 
    'shape_geometry',
    'prior_vp_idx', 'subseq_vp_idx', 
    'prior_vp_meters', 'subseq_vp_meters', 
    'start_local_timestamp', 'end_local_timestamp'
]

In [None]:
gdf3 = neighbor.enforce_monotonicity_and_interpolate_across_stops(
    gdf2, trip_stop_cols).drop(columns = drop_cols)

In [None]:
gdf4 = neighbor.calculate_speed_from_stop_arrivals(
    gdf3,
    trip_cols = ["trip_instance_key"],
    trip_stop_cols = ["trip_instance_key", "stop_sequence"]
)

In [None]:
gdf4.shape

In [None]:
gdf4[(gdf4.speed_mph.notna()) & 
     (gdf4.speed_mph < np.inf)].shape

In [None]:
gdf4.dtypes

In [None]:
segments = gpd.read_parquet(
    f"{PARTRIDGE_FOLDER}stops_to_segments.parquet",
    filters = trip_filter
)

In [None]:
speed_gdf = pd.merge(
    segments,
    gdf4,
    on = ["trip_id", "stop_id1", "stop_id2"]
)

speed_valid = speed_gdf[(speed_gdf.sec_elapsed > 0)]

In [None]:
COLORSCALE = branca.colormap.step.RdBu_10.scale(vmin=0, vmax=30)

speed_valid.drop(columns = "arrival_time").explore(
    "speed_mph", cmap=COLORSCALE,
    tiles = "CartoDB Positron"
)