Skip to content

Commit

Permalink
[performance] Trim by times only on init (#121)
Browse files Browse the repository at this point in the history
* Trim routes stop times by timeframe just 1x

* Remove reporting on timeframe subsetting
  • Loading branch information
kuanb committed Dec 24, 2018
1 parent 16b3c73 commit 85daf75
Showing 1 changed file with 6 additions and 13 deletions.
19 changes: 6 additions & 13 deletions peartree/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@ def __init__(
# Initialize common parameters
self.target_time_start = target_time_start
self.target_time_end = target_time_end
self.stop_times = stop_times.copy()

# Limit stop times held to just those in time range
start_mask = (stop_times.arrival_time >= target_time_start)
end_mask = (stop_times.arrival_time <= target_time_end)
self.stop_times = stop_times[start_mask & end_mask].copy()

# We use route_id as the index to ensure that subselection by
# route_id from target_route_ids more performant
Expand All @@ -51,18 +55,7 @@ def generate_route_costs(self, route_id: str):

# Get just the stop times related to this trip
st_trip_id_mask = self.stop_times.trip_id.isin(trips.trip_id)
stimes_init = self.stop_times[st_trip_id_mask].copy()

# Then subset further by just the time period that we care about
start_time_mask = (stimes_init.arrival_time >= self.target_time_start)
end_time_mask = (stimes_init.arrival_time <= self.target_time_end)
stimes = stimes_init[start_time_mask & end_time_mask]

# Report on progress if requested
a = len(stimes_init.trip_id.unique())
b = len(stimes.trip_id.unique())
log('\tReduced selected trips on route {} from {} to {}.'.format(
route_id, a, b))
stimes = self.stop_times[st_trip_id_mask].copy()

trips_and_stop_times = pd.merge(trips,
stimes,
Expand Down

0 comments on commit 85daf75

Please sign in to comment.