# MnMS Tutorial: Exploitation of GTFS Data

In [None]:
from gtfs_functions import Feed
from shapely.geometry import LineString, Point

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

In [None]:
import geopandas as gpd

## Read GTFS Data

The following code will parse a GTFS folder into datasets.

In [None]:
gtfs_path = "gtfs-nl.zip"
feed = Feed(gtfs_path)

routes = feed.routes
stops = feed.stops
stop_times = feed.stop_times
trips = feed.trips
shapes = feed.shapes

Three DataFrames (routes, stop_times and trips) and two GeoDataFrames (stops and shapes) are generated.

### Routes

In [None]:
routes.head()

### Stops

In [None]:
stops.head()

### Stop Times

In [None]:
stop_times.head()

### Trips

In [None]:
trips.head()

### Shapes

In [None]:
shapes.head()

### Calculate Stop Frequencies

In [None]:
time_windows = [0, 6, 9, 15.5, 19, 22, 24]

feed = Feed(gtfs_path, time_windows=time_windows)
stop_freq = feed.stops_freq
stop_freq.head()

### Calculate Line Frequencies

In [None]:
line_freq = feed.lines_freq
line_freq.head()

### Cut the Shapes into Segments

In [None]:
segments_gdf = feed.segments
segments_gdf.head()

### Calculate Segments Speeds

In [None]:
speeds = feed.avg_speeds
speeds.head()

### Calculate Segment Frequencies

In [None]:
segments_freq = feed.segments_freq
segments_freq.head()

### Plot stops coordinates

In [None]:
stops_coords = stops["geometry"]
points = []

for sc in stops_coords:
    points.append(Point(sc))

xs = [point.x for point in points]
ys = [point.y for point in points]
    
plt.scatter(xs, ys, color="red", s=0.1)
    
plt.show()

### Extract a list of stops for a specific line

There is no direct relationship between routes and stops in GTFS, stops are associated with trips.
Each trip represents a single travel of a vehicle along a particular route/path.

Assuming we know the id of one route in routes. We will also need the datasets stops, trips and stop_times.

In [None]:
route_id_example_1 = "17748" # first route on trips, (Sprinter) Uitgeest <-> Rotterdam Centraal

# Filtering for the desired route_id_example_1
filtered_trips = trips.loc[trips['route_id'] == route_id_example_1]

# Performing the equivalent of INNER JOINs
merged_data = filtered_trips.merge(stop_times, on='trip_id').merge(stops, on='stop_id')

# Selecting distinct stop_id and stop_name
distinct_stops = merged_data[['stop_id', 'stop_name_y', 'stop_lat_y', 'stop_lon_y', 'geometry_y']].drop_duplicates(subset="stop_name_y", keep="first")

print(distinct_stops.to_string())

We can observe that the stops id order doesn't match the order of the route, it should have started from Uitgeest to Rotterdam Centraal

### Plot the path and stops for this specific line

In [None]:
stops_coords = distinct_stops["geometry_y"]
points = []

for sc in stops_coords:
    points.append(Point(sc))

xs = [point.x for point in points]
ys = [point.y for point in points]

In [None]:
plt.scatter(xs, ys, color="red", s=1)
plt.show()

In [None]:
plt.plot(xs, ys)    
plt.show()

When we plot the whole path of the line, it confirms that the order of stops doesn't suit.

### Histogram with Stop Frequencies

In [None]:
px.histogram(
    stop_freq.loc[stop_freq.min_per_trip<50],
    x="min_per_trip",
    title="Stop frequencies",
    template="simple_white",
    nbins = 8)

### (WIP) HeatMap with Scheduled Speeds

HeatMap with scheduled speeds per segment for one specific route and direction.

In [None]:
# WIP

dir_0 = speeds.loc[(speeds.direction_id=='0')&(speeds.route_name=='47 Geldermalsen - Gorinchem')].sort_values(by='stop_sequence') 
dir_0['hour'] = dir_0.window.apply(lambda x: int(x.split(':')[0]))
dir_0.sort_values(by='hour', ascending=True, inplace=True)

fig = go.Figure(data = go.Heatmap(
                           z = dir_0.speed_kmh,
                           y = dir_0.start_stop_name,
                           x = dir_0.window,
                           hoverongaps = False,
                           colorscale = px.colors.colorbrewer.RdYlBu, 
                           reversescale = False
))

fig.update_yaxes(title_text='Stop', autorange='reversed')
fig.update_xaxes(title_text='Hour of day', side='top')
fig.update_layout(showlegend=False, height=600, width=1000,
                 title='Speed heatmap per direction and hour of the day')

fig.show()