In [1]:
!pip3 install polars



In [2]:
import polars as pl
import os

# Compute pairs of consecutive stops in the time table

## Load data

In [3]:
# GTFS data (used for timetable)
STATIC_GTFS_URL="https://eu.ftp.opendatasoft.com/stif/GTFS/IDFM-gtfs.zip"
STATIC_GTFS_FILE_PATH="raw_data/gtfs.zip"
STATIC_GTFS_PATH="raw_data/gtfs"

In [4]:
stop_times_path = os.path.join(STATIC_GTFS_PATH, 'stop_times.txt')
stop_times_cols = ['trip_id','stop_id','stop_sequence']
stop_times_dtype = {'trip_id': pl.String,
                    'stop_id': pl.String,
                    'stop_sequence': pl.Int16}
stop_times = pl.scan_csv(stop_times_path, schema_overrides=stop_times_dtype).select(stop_times_cols)

In [5]:
stops_path = os.path.join(STATIC_GTFS_PATH, 'stops.txt')
stops_dtype = {'stop_id': pl.String,
               'stop_lon': pl.Float64,
               'stop_lat': pl.Float64
               }
stops = pl.scan_csv(stops_path, schema_overrides=stops_dtype).select(list(stops_dtype.keys()))

## Enrich time table with stop data

In [6]:
stop_times = stop_times.join(stops, on='stop_id')

In [7]:
stop_times = stop_times.sort(['trip_id', 'stop_sequence'])

## Get consecutive stops pairs

In [8]:
stop_pairs = (
    (
        (
            pl.concat([stop_times.rename(lambda col: "orig_" + col),
                       stop_times.rename(lambda col: "dest_" + col).shift(-1)],
                      how='horizontal'
                      )
        ).filter(pl.col('orig_trip_id') == pl.col('dest_trip_id'))
    ).filter(pl.col('orig_stop_sequence') < pl.col('dest_stop_sequence'))
).drop(['orig_trip_id', 'dest_trip_id', 'orig_stop_sequence', 'dest_stop_sequence']).unique()

In [9]:
stop_pairs.collect()

orig_stop_id,orig_stop_lon,orig_stop_lat,dest_stop_id,dest_stop_lon,dest_stop_lat
str,f64,f64,str,f64,f64
"""IDFM:39762""",2.333002,48.883787,"""IDFM:28561""",2.328822,48.884357
"""IDFM:26940""",2.553477,48.799186,"""IDFM:26942""",2.557252,48.798696
"""IDFM:7161""",3.034617,48.386554,"""IDFM:7162""",3.023515,48.385202
"""IDFM:monomodalStopPlace:411396""",2.8881,49.231083,"""IDFM:monomodalStopPlace:411387""",2.837987,49.200711
"""IDFM:18880""",2.54213,48.892917,"""IDFM:479768""",2.543405,48.895278
…,…,…,…,…,…
"""IDFM:486588""",2.305745,48.813032,"""IDFM:488474""",2.307256,48.816144
"""IDFM:9962""",2.067326,48.763866,"""IDFM:20651""",2.06922,48.765203
"""IDFM:463774""",2.372151,48.83869,"""IDFM:463524""",2.373229,48.8374
"""IDFM:480177""",2.279,48.886847,"""IDFM:480175""",2.281024,48.884503
