# MnMS Tutorial: Exploitation of GTFS Data

In [None]:
from gtfs_functions import Feed
from shapely.geometry import LineString, Point

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

In [None]:
import geopandas as gpd

## Read GTFS Data

The following code will parse a GTFS folder into datasets.

In [None]:
gtfs_path = "lyon_tcl.zip"
feed = Feed(gtfs_path)

routes = feed.routes
stops = feed.stops
stop_times = feed.stop_times
trips = feed.trips
shapes = feed.shapes

Three DataFrames (routes, stop_times and trips) and two GeoDataFrames (stops and shapes) are generated.

### Routes

In [None]:
routes.head()

### Stops

In [None]:
stops.head()

### Stop Times

In [None]:
stop_times.head()

### Trips

In [None]:
trips.head()

### Shapes

In [None]:
shapes.head()

### Calculate Stop Frequencies

In [None]:
time_windows = [0, 6, 9, 15.5, 19, 22, 24]

feed = Feed(gtfs_path, time_windows=time_windows)
stop_freq = feed.stops_freq
stop_freq.head()

### Calculate Line Frequencies

In [None]:
line_freq = feed.lines_freq
line_freq.head()

### Cut the Shapes into Segments

In [None]:
segments_gdf = feed.segments
segments_gdf.head()

### Calculate Segments Speeds

In [None]:
speeds = feed.avg_speeds
speeds.head()

### Calculate Segment Frequencies

In [None]:
segments_freq = feed.segments_freq
segments_freq.head()

### Plot stops coordinates

In [None]:
stops_coords = stops["geometry"]
points = []

for sc in stops_coords:
    points.append(Point(sc))

xs = [point.x for point in points]
ys = [point.y for point in points]
    
plt.scatter(xs, ys, color="red", s=0.1)
    
plt.show()

### Extract a list of stops for a specific line

There is no direct relationship between routes and stops in GTFS, stops are associated with trips.
Each trip represents a single travel of a vehicle along a particular route/path.

Assuming we know the id of one route in routes. We will also need the datasets stops, trips and stop_times.

In [None]:
route_id_example_1 = "T2a80"

# Filtering for the desired route_id_example_1
ftrips = trips.loc[trips['route_id'] == route_id_example_1]

# Performing the equivalent of INNER JOINs
merged_data = ftrips.merge(stop_times, on='trip_id').merge(stops, on='stop_id')

# Select the longest trip in route
longest_trip_id = ""
longest_stops_count = 0

for index, md in merged_data.iterrows():
    trip_id = md["trip_id"]
    stops_count = merged_data["trip_id"].value_counts()[trip_id]
    if stops_count > longest_stops_count:
        longest_trip_id = trip_id
        longest_stops_count = stops_count

# print(longest_trip_id, longest_stops_count)

# Selecting distinct stop_id and stop_name
merged_stops = merged_data[['stop_id', 'trip_id', 'stop_sequence', 'stop_name_y', 'stop_lat_y', 'stop_lon_y', 'geometry_y']]
distinct_stops = merged_stops.loc[merged_stops["trip_id"] == longest_trip_id]
distinct_stops = distinct_stops.sort_values(['stop_sequence'])
distinct_stops = distinct_stops.reset_index()


### Function defintion to get the list of stops for the longest trip of a route

In [None]:
def getLongestTripStops(route_id):
    
    # Filtering for the desired route_id_example_1
    ftrips = trips.loc[trips["route_id"] == route_id]

    # Performing the equivalent of INNER JOINs
    merged_data = ftrips.merge(stop_times, on='trip_id').merge(stops, on='stop_id')

    # Select the longest trip in route
    longest_trip_id = ""
    longest_stops_count = 0

    for index, md in merged_data.iterrows():
        trip_id = md["trip_id"]
        stops_count = merged_data["trip_id"].value_counts()[trip_id]
        if stops_count > longest_stops_count:
            longest_trip_id = trip_id
            longest_stops_count = stops_count

    # Selecting distinct stop_id and stop_name
    merged_stops = merged_data[['stop_id', 'trip_id', 'stop_sequence', 'stop_name_y', 'stop_lat_y', 'stop_lon_y', 'geometry_y']]
    distinct_stops = merged_stops.loc[merged_stops["trip_id"] == longest_trip_id]
    distinct_stops = distinct_stops.sort_values(["stop_sequence"])
    distinct_stops = distinct_stops.reset_index()
    
    return distinct_stops

### Plot the path and stops for this specific line

In [None]:
stops_coords = distinct_stops["geometry_y"]
points = []

for sc in stops_coords:
    points.append(Point(sc))

xs = [point.x for point in points]
ys = [point.y for point in points]

In [None]:
plt.scatter(xs, ys, color="red", s=1)
plt.show()

In [None]:
plt.plot(xs, ys, color='red')    
plt.show()

When we plot the whole path of the line, it confirms that the order of stops doesn't suit.

### Plot all the lines based on route type

In [None]:
# In Lyon GTFS: Tram = 0, Subway = 1, Bus = 3
tram_id = 0
subway_id = 1
bus_id = 3

for index, route in routes.iterrows():
    
    route_type = route["route_type"]
    route_id = route["route_id"]
    
    if route_type == 1:
        ftrips = trips.loc[trips["route_id"] == route_id]
        merged_data = ftrips.merge(stop_times, on='trip_id').merge(stops, on='stop_id')

        distinct_stops = getLongestTripStops(route_id)

        stops_coords = distinct_stops["geometry_y"]
        points = []

        for sc in stops_coords:
            points.append(Point(sc))

        xs = [point.x for point in points]
        ys = [point.y for point in points]

        plt.plot(xs, ys)
        
plt.show()