In [2]:
import numpy as np
import pandas as pd
import json
import folium
import os
from datetime import datetime
from datetime import timedelta
import scipy.spatial
import geopy
from geopy.distance import distance
import pytz
from math import radians, cos, sin, asin, sqrt, pi
import random

import matplotlib.pyplot as plt

# Load GPS Data

In [3]:
with open('./TPF/tao&marced/response_marced_260319_fribourg.json') as f:
    data = json.load(f)

user_gps_df = pd.DataFrame.from_dict(data['userLocationTrace'])

In [4]:
timezone = pytz.timezone("Europe/Zurich")

In [5]:
def convert_timestamp(ts):
    if len(ts) > 20:
        return timezone.localize(datetime.strptime(ts, "%Y-%m-%dT%H:%M:%S.%fZ")).timestamp()
    else:
        return timezone.localize(datetime.strptime(ts, "%Y-%m-%dT%H:%M:%SZ")).timestamp()

In [6]:
user_gps_df = user_gps_df.astype({'altitude': float, 
                                  'latitude': float,
                                  'longitude': float,
                                  'speed': float, 
                                  'accuracy': float, 
                                  'altitudeAccuracy': float,
                                  'heading': float})

user_gps_df['timestamp_unix'] = user_gps_df.apply(lambda x: convert_timestamp(x.timestamp), axis=1)

# Remove innacurate readings
user_gps_df = user_gps_df[(user_gps_df.accuracy<30) & (user_gps_df.altitudeAccuracy<30)]

user_gps_df = user_gps_df.sort_values('timestamp_unix').reset_index(drop=True)
user_gps_df['id'] = range(user_gps_df.shape[0])

# Public Transport Network

Load TPC stops data

In [7]:
tpc_stops = pd.read_csv('./TPF/Planned/stops_extend.txt')
tpc_stops.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stp_correspondances,mp3_filename,stop_area,stop_shortname,thoreb_no
0,850488110,,"Fribourg/Freiburg, gare routière",,46.803239,7.150122,,,0,place_FRIgar,,2021.mp3,8504881,FRIgar,15000.0
1,858916300,,"Fribourg, Stade-Patinoire",,46.817895,7.154564,,,0,,,1084.mp3,8589163,Sta,14999.0
2,850023801,,"Bulle, Verdel",,46.624838,7.065969,,,0,,,4152.mp3,8500238,BULrtv,17903.0
3,850343501,,"Riaz, CO",,46.645104,7.065568,,,0,,,4159.mp3,8503435,RIA-CO,17935.0
4,850371801,,"Avry-sur-Matran, CO",,46.786202,7.077425,,,0,,,4083.mp3,8503718,AVSmco,16590.0


After investigation, decided to use SBB gtfs data instead as there were missing data points in TPC dataset

In [9]:
def read_sbb(filename, path):
    with open(path + filename) as f:
        raw = f.readlines()
    
    col_names = raw[0][1:-1].split(',')
    data = [l[:-1].replace('","', ';').replace('"', '').split(';') for l in raw[1:]]
    
    df = pd.DataFrame(data, columns=col_names)
    
    return df

In [10]:
path = 'gtfsfp20192019-03-20/'

In [11]:
stops_df = read_sbb('stops.txt', path)
stop_times_df = read_sbb('stop_times.txt', path)
routes_df = read_sbb('routes.txt', path)
trips_df = read_sbb('trips.txt', path)

Initial approach to include all stops within 10km radius of Fribourg but decided to stick with those stops the TPF network covers instead for simplicity as the initial dataset was only within this network.

In [12]:
def haversine(lon1, lat1, lon2, lat2):
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers
    return c * r

In [13]:
# hardcoded Fribourg gare coordinates
stops_df['from_FB'] = np.vectorize(haversine)(7.161817, 46.806477, 
                                              stops_df.stop_lon.astype('float64'), 
                                              stops_df.stop_lat.astype('float64'))

In [14]:
# Filter to TPF
routes_df = routes_df.astype({'agency_id': str})
routes_df = routes_df[routes_df.agency_id=='834'][['route_id', 'route_desc', 'route_short_name']]

fribourg_sbb = pd.merge(trips_df, routes_df, on='route_id')
fribourg_sbb = pd.merge(stop_times_df, fribourg_sbb, on='trip_id')
fribourg_sbb = pd.merge(fribourg_sbb, stops_df, on='stop_id')
fribourg_sbb = fribourg_sbb.drop(columns=['pickup_type', 'drop_off_type','location_type', 'parent_station'])
fribourg_sbb = fribourg_sbb.astype({'stop_sequence': int})
fribourg_sbb.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,route_id,service_id,trip_headsign,trip_short_name,direction_id,route_desc,route_short_name,stop_name,stop_lat,stop_lon,from_FB
0,1.TA.6-260-A-j19-1.1.H,06:59:00,06:59:00,8504938,1,6-260-A-j19-1,TA+b002a,"Boltigen, Bahnhof",26003,0,Bus,260,"Jaun, Kappelboden",46.6102353307346,7.28713358579193,23.821399
1,2.TA.6-260-A-j19-1.1.H,07:31:00,07:31:00,8504938,1,6-260-A-j19-1,TA+b08x0,"Boltigen, Bahnhof",26005,0,Bus,260,"Jaun, Kappelboden",46.6102353307346,7.28713358579193,23.821399
2,4.TA.6-260-A-j19-1.1.H,08:31:00,08:31:00,8504938,1,6-260-A-j19-1,TA+b06uz,"Boltigen, Bahnhof",26009,0,Bus,260,"Jaun, Kappelboden",46.6102353307346,7.28713358579193,23.821399
3,6.TA.6-260-A-j19-1.1.H,09:31:00,09:31:00,8504938,1,6-260-A-j19-1,TA+b0cnb,"Boltigen, Bahnhof",26013,0,Bus,260,"Jaun, Kappelboden",46.6102353307346,7.28713358579193,23.821399
4,7.TA.6-260-A-j19-1.1.H,11:08:00,11:08:00,8504938,1,6-260-A-j19-1,TA+b0023,"Boltigen, Bahnhof",26017,0,Bus,260,"Jaun, Kappelboden",46.6102353307346,7.28713358579193,23.821399


In [15]:
fribourg_stops = fribourg_sbb[['stop_id', 'stop_name', 'stop_lat', 'stop_lon']].drop_duplicates().reset_index(drop=True)

In [16]:
def get_closest_stop(lat, lon, id_, stops=fribourg_stops):
    temp = stops.copy()
    temp = temp.astype({'stop_lat': float, 'stop_lon': float})
    temp['diff'] = abs(temp['stop_lat'] - lat) + abs(temp['stop_lon'] - lon)
    temp = temp.sort_values('diff')
    return [id_, temp.iloc[0].stop_name, temp.iloc[0].stop_lat, temp.iloc[0].stop_lon]

In [17]:
user_gps_to_stops = pd.DataFrame(list(user_gps_df.apply(lambda x: get_closest_stop(x.latitude, x.longitude, x.id), 
                                                        axis=1)), 
                                 columns=['id', 'closest_stop_name', 'closest_stop_lat', 'closest_stop_lon'])

user_gps_df = pd.merge(user_gps_df, user_gps_to_stops, on='id')

In [18]:
def calc_dist(lat_s, long_s, lat_f, long_f, alt_s=None, alt_f=None):
    dist_2d = distance((lat_s, long_s), (lat_f, long_f)).m
    if alt_f is None or alt_s is None:
        return dist_2d
    else:
        dist_3d = (dist_2d**2 + (alt_s-alt_f)**2)**0.5
        return dist_3d

In [19]:
user_gps_df['closest_stop_dist'] = user_gps_df.apply(lambda x: calc_dist(x.latitude, x.longitude, x.closest_stop_lat, x.closest_stop_lon), axis=1)

## GPS to Edges

In [20]:
user_gps_df = user_gps_df.drop(columns={'uuid', 'id', 'user'}).drop_duplicates()
user_gps_df = user_gps_df.sort_values('timestamp_unix').reset_index(drop=True)

In [21]:
def points_to_edges(df):
    
    df = df.reset_index(drop=True)
    df['temp_id'] = range(0, df.shape[0])
    df['temp_id'] = df.temp_id - 1 

    temp_df = df[df.temp_id>=0]

    edge_df = pd.merge(df, temp_df, right_on='temp_id', left_index=True, how='inner', suffixes=('_s', '_f'))
    edge_df = edge_df.drop(columns={'temp_id', 'temp_id_s'})
    edge_df = edge_df.reset_index(drop=True)

    edge_df['duration'] = edge_df.timestamp_unix_f - edge_df.timestamp_unix_s

    edge_df['distance'] = edge_df.apply(lambda x: calc_dist(x.latitude_s, x.longitude_s, x.latitude_f, 
                                                            x.longitude_f, x.altitude_s, x.altitude_f), 
                                        axis=1)

    edge_df['meters_per_s'] = edge_df['distance'] / edge_df['duration']

    edge_df = edge_df.drop(columns=['temp_id_f'])

    edge_df['index_s'] = range(0, edge_df.shape[0])
    edge_df['index_f'] = edge_df.index_s + 1
    
    return edge_df

In [22]:
edge_df = points_to_edges(user_gps_df)

## Split walk/non-walk segments

In [23]:
def generate_segment_ids(df, distance_thershold=50, duration_threshold=60):
    # define walk edges as ones where speed is below threshold
    df['is_walk'] = False
    df.loc[(df.meters_per_s<2.79)&(df.speed_s<2.79)&(df.speed_f<2.79), 'is_walk'] = True
    
    # obtain certain walk and non walk segments
    walk_segments, non_walk_segments = get_segments(df, distance_thershold, duration_threshold)
    
    df['is_walk'] = None
    df.loc[[x for y in walk_segments for x in y], 'is_walk'] = True
    df.loc[[x for y in non_walk_segments for x in y], 'is_walk'] = False
    
    # for uncertain edges, merge them into preceding segment
    uncertain_points = []
    last_certain_point = df[~df.is_walk.isnull()].is_walk.iloc[0]
    
    for i, r in df.iterrows():
        if r.is_walk is None:
            uncertain_points.append(i)
            if i == df.index.max():
                df.loc[uncertain_points, 'is_walk'] = last_certain_point
        else:
            if len(uncertain_points) == 0:
                last_certain_point = r.is_walk
            else:
                df.loc[uncertain_points, 'is_walk'] = last_certain_point
                uncertain_points = []
    
    walk_segments, non_walk_segments = get_segments(df, distance_thershold, duration_threshold)
    
    return df, walk_segments, non_walk_segments

In [24]:
def get_segments(df, distance_thershold=50, duration_threshold=60):
    distance, duration = 0, 0
    walk_segments, non_walk_segments = [], []
    w_seg, nw_seg = [], []
    max_i = df.index.max()
    
    for i, l in df.iterrows():
        if l.is_walk:
            duration += l.duration
            w_seg.append(i)
            
            # if previous non walk segment exceeds distance threshold, record it
            if distance >= distance_thershold:
                non_walk_segments.append(nw_seg)

            distance = 0
            nw_seg = []
        else:
            distance += l.distance
            nw_seg.append(i)
            
            # if previous walk segment exceeds duration threshold, record it
            if duration >= duration_threshold:
                walk_segments.append(w_seg)
            duration = 0
            w_seg = []
            
    # verify if last segment meets its respective threshold
    if distance >= distance_thershold:
        non_walk_segments.append(nw_seg)
    if duration >= duration_threshold:
        walk_segments.append(w_seg)
                
    return walk_segments, non_walk_segments

In [25]:
edge_df, walk_segments, non_walk_segments = generate_segment_ids(edge_df)

In [26]:
for ws in walk_segments:
    print('-'*100 + 'SEGMENT')
    for p in ws:
        print('EDGE')
        print('Start Time: ' + edge_df.loc[p].timestamp_s)
        print('Start Point: ' + str(edge_df.loc[p].index_s) + ' ' + edge_df.loc[p].closest_stop_name_s + ', '
              + 'Distance from stop:' +str(edge_df.loc[p].closest_stop_dist_s))
        print('Speed: '+str(edge_df.loc[p].meters_per_s))
        print('Duration: ' +str(edge_df.loc[p].duration))
        print('Distance: ' +str(edge_df.loc[p].distance))
        print('End Time: ' + edge_df.loc[p].timestamp_f)
        print('Finish Point: ' + str(edge_df.loc[p].index_f) + ' ' + edge_df.loc[p].closest_stop_name_f + ', '
              + 'Distance from stop:' +str(edge_df.loc[p].closest_stop_dist_f)+ ' \n')

----------------------------------------------------------------------------------------------------SEGMENT
EDGE
Start Time: 2019-03-26T08:57:58Z
Start Point: 8 Villars-sur-Glâne, Moncor, Distance from stop:160.81077739256236
Speed: 0.9195393188332593
Duration: 14.0
Distance: 12.87355046366563
End Time: 2019-03-26T08:58:12Z
Finish Point: 9 Villars-sur-Glâne, Moncor, Distance from stop:161.89463159180514 

EDGE
Start Time: 2019-03-26T08:58:12Z
Start Point: 9 Villars-sur-Glâne, Moncor, Distance from stop:161.89463159180514
Speed: 0.47517947495153734
Duration: 798.239000082016
Distance: 379.3067889448125
End Time: 2019-03-26T09:11:30.239000Z
Finish Point: 10 Villars-sur-Glâne, Rte Soleil, Distance from stop:130.46514260185205 

EDGE
Start Time: 2019-03-26T09:11:30.239000Z
Start Point: 10 Villars-sur-Glâne, Rte Soleil, Distance from stop:130.46514260185205
Speed: 1.0746626911491606
Duration: 11.855999946594238
Distance: 12.741200808871268
End Time: 2019-03-26T09:11:42.095000Z
Finish Point:

Speed: 1.732811266613041
Duration: 6.0
Distance: 10.396867599678247
End Time: 2019-03-26T09:59:38Z
Finish Point: 236 Fribourg, Place Georges Python, Distance from stop:62.66220274874859 

EDGE
Start Time: 2019-03-26T09:59:38Z
Start Point: 236 Fribourg, Place Georges Python, Distance from stop:62.66220274874859
Speed: 1.4002969859558987
Duration: 8.0
Distance: 11.20237588764719
End Time: 2019-03-26T09:59:46Z
Finish Point: 237 Fribourg, Place Georges Python, Distance from stop:73.63633649732128 

EDGE
Start Time: 2019-03-26T09:59:46Z
Start Point: 237 Fribourg, Place Georges Python, Distance from stop:73.63633649732128
Speed: 1.1520242234777198
Duration: 9.0
Distance: 10.36821801129948
End Time: 2019-03-26T09:59:55Z
Finish Point: 238 Fribourg, Place Georges Python, Distance from stop:83.62917666323543 

EDGE
Start Time: 2019-03-26T09:59:55Z
Start Point: 238 Fribourg, Place Georges Python, Distance from stop:83.62917666323543
Speed: 1.8375844913855006
Duration: 6.0
Distance: 11.02550694831

Duration: 6.997999906539917
Distance: 10.91792170220936
End Time: 2019-03-26T10:41:43.165000Z
Finish Point: 363 Fribourg, Planche-Inférieure, Distance from stop:77.71433280366374 

EDGE
Start Time: 2019-03-26T10:41:43.165000Z
Start Point: 363 Fribourg, Planche-Inférieure, Distance from stop:77.71433280366374
Speed: 1.314242480778736
Duration: 7.998000144958496
Distance: 10.511311551778945
End Time: 2019-03-26T10:41:51.163000Z
Finish Point: 364 Fribourg, Planche-Inférieure, Distance from stop:87.44150594414073 

EDGE
Start Time: 2019-03-26T10:41:51.163000Z
Start Point: 364 Fribourg, Planche-Inférieure, Distance from stop:87.44150594414073
Speed: 0.4505611531141579
Duration: 23.000999927520752
Distance: 10.363357050122412
End Time: 2019-03-26T10:42:14.164000Z
Finish Point: 365 Fribourg, Planche-Inférieure, Distance from stop:97.75437712974126 

EDGE
Start Time: 2019-03-26T10:42:14.164000Z
Start Point: 365 Fribourg, Planche-Inférieure, Distance from stop:97.75437712974126
Speed: 1.0143247

In [27]:
for ws in non_walk_segments:
    print('-'*100 + 'SEGMENT')
    for p in ws:
        print('EDGE')
        print('Start Time: ' + edge_df.loc[p].timestamp_s)
        print('Start Point: ' + str(edge_df.loc[p].index_s) + ' ' + edge_df.loc[p].closest_stop_name_s + ', '
              + 'Distance from stop:' +str(edge_df.loc[p].closest_stop_dist_s))
        print('Speed: '+str(edge_df.loc[p].meters_per_s))
        print('Duration: ' +str(edge_df.loc[p].duration))
        print('Distance: ' +str(edge_df.loc[p].distance))
        print('End Time: ' + edge_df.loc[p].timestamp_f)
        print('Finish Point: ' + str(edge_df.loc[p].index_f) + ' ' + edge_df.loc[p].closest_stop_name_f + ', '
              + 'Distance from stop:' +str(edge_df.loc[p].closest_stop_dist_f)+ ' \n')

----------------------------------------------------------------------------------------------------SEGMENT
EDGE
Start Time: 2019-03-26T08:50:43Z
Start Point: 0 Fribourg, Bethléem, Distance from stop:23.98997402080832
Speed: 4.09118830246827
Duration: 192.99900007247925
Distance: 789.5952514845999
End Time: 2019-03-26T08:53:55.999000Z
Finish Point: 1 Villars-sur-Glâne,Villars-Vert, Distance from stop:209.9769425775891 

EDGE
Start Time: 2019-03-26T08:53:55.999000Z
Start Point: 1 Villars-sur-Glâne,Villars-Vert, Distance from stop:209.9769425775891
Speed: 6.667774540100744
Duration: 28.000999927520752
Distance: 186.70435441408566
End Time: 2019-03-26T08:54:24Z
Finish Point: 2 Villars-sur-Glâne,Villars-Vert, Distance from stop:26.994024261590894 

EDGE
Start Time: 2019-03-26T08:54:24Z
Start Point: 2 Villars-sur-Glâne,Villars-Vert, Distance from stop:26.994024261590894
Speed: 3.66594217108648
Duration: 30.325999975204468
Distance: 111.1733621894696
End Time: 2019-03-26T08:54:54.326000Z
Fin

Start Time: 2019-03-26T09:51:45Z
Start Point: 178 Fribourg, Midi, Distance from stop:25.209699206380968
Speed: 8.146310135201931
Duration: 4.0
Distance: 32.585240540807725
End Time: 2019-03-26T09:51:49Z
Finish Point: 179 Fribourg, Midi, Distance from stop:9.897318120540987 

EDGE
Start Time: 2019-03-26T09:51:49Z
Start Point: 179 Fribourg, Midi, Distance from stop:9.897318120540987
Speed: 2.0584658373635105
Duration: 10.0
Distance: 20.584658373635104
End Time: 2019-03-26T09:51:59Z
Finish Point: 180 Fribourg, Midi, Distance from stop:29.526369962026674 

EDGE
Start Time: 2019-03-26T09:51:59Z
Start Point: 180 Fribourg, Midi, Distance from stop:29.526369962026674
Speed: 1.1879585825954182
Duration: 13.0
Distance: 15.443461573740436
End Time: 2019-03-26T09:52:12Z
Finish Point: 181 Fribourg, Midi, Distance from stop:44.16755126127349 

EDGE
Start Time: 2019-03-26T09:52:12Z
Start Point: 181 Fribourg, Midi, Distance from stop:44.16755126127349
Speed: 6.418426721204043
Duration: 4.0
Distance: 2

Duration: 4.0
Distance: 24.344655045366817
End Time: 2019-03-26T10:26:02.003000Z
Finish Point: 302 Fribourg, Neuveville, Distance from stop:4.227254175647462 

EDGE
Start Time: 2019-03-26T10:26:02.003000Z
Start Point: 302 Fribourg, Neuveville, Distance from stop:4.227254175647462
Speed: 1.3637142067642403
Duration: 18.0
Distance: 24.546855721756323
End Time: 2019-03-26T10:26:20.003000Z
Finish Point: 303 Fribourg, Neuveville, Distance from stop:26.024075804706037 

EDGE
Start Time: 2019-03-26T10:26:20.003000Z
Start Point: 303 Fribourg, Neuveville, Distance from stop:26.024075804706037
Speed: 2.926007300928726
Duration: 5.0
Distance: 14.63003650464363
End Time: 2019-03-26T10:26:25.003000Z
Finish Point: 304 Fribourg, Neuveville, Distance from stop:40.1458509463497 

EDGE
Start Time: 2019-03-26T10:26:25.003000Z
Start Point: 304 Fribourg, Neuveville, Distance from stop:40.1458509463497
Speed: 0.7326207055267187
Duration: 7.0
Distance: 5.128344938687031
End Time: 2019-03-26T10:26:32.003000Z


End Time: 2019-03-26T10:58:56.149000Z
Finish Point: 417 Fribourg, gare CFF, Distance from stop:37.74631045908596 



# Map plot

In [28]:
def edge_to_point_id(df, walk_segs, non_walk_segs):
    if len(non_walk_segs)==0:
        return [list(range(0, df.shape[0]+1))], None
    
    if len(walk_segs)==0:
        return None, [list(range(0, df.shape[0]+1))]
    
    # Generate gps point indices from edge indices
    non_walk_points = []
    for nws in non_walk_segs:
        # Non-walk segments' extremeties are inclusive
        nwp = list(set(df.loc[nws][['index_s', 'index_f']].values.reshape(-1)))
        nwp.sort()
        non_walk_points.append(nwp)
        
    walk_points = []
    # Every point between two non-walk segments is a walk point
    for segs in zip(non_walk_points[:-1], non_walk_points[1:]):
        # Walk segments' extremeties are exclusive
        walk_points.append(list(range(segs[0][-1]+1, segs[1][0])))
                
    # If trip starts with walk segment, generate those initial gps point ids
    if non_walk_points[0][0] != 0:
        # In this case the first point is included
        walk_points = list(range(0, non_walk_points[0][0])) + walk_points

    # If trip ends with walk segment, generate those ending gps point ids
    trip_max_id = df.index_f.max()
    if non_walk_points[-1][-1] != trip_max_id:
        # In this case the last point is included
        walk_points.append(list(range(non_walk_points[-1][-1]+1, trip_max_id+1)))
        
    return walk_points, non_walk_points

In [30]:
walk_points, non_walk_points = edge_to_point_id(edge_df, walk_segments, non_walk_segments)

In [31]:
latlon = user_gps_df.join(edge_df[['is_walk']])

locationOfFribourg = [46.805498, 7.142255]
mapFribourg = folium.Map(location=locationOfFribourg, zoom_start=15)

for i, coord in latlon.iterrows():
    if coord.is_walk:
        color = 'green'
    else:
        color = 'red'
    #mapFribourg.add_child(folium.Marker(location=[coord.latitude, coord.longitude], popup=i, icon=folium.Icon(color=color)))
    mapFribourg.add_child(folium.CircleMarker(location=[coord.latitude, coord.longitude], popup=i, radius=3, color=color))
    
plot_segments = list(zip(non_walk_points, walk_points+[None]))
plot_segments = [x for a in plot_segments for x in a if x!=None]

for s in plot_segments:
    points = user_gps_df.loc[s][['latitude', 'longitude']].values
    
    if s in walk_points:
        color="green"
    else:
        color="red"
        
    folium.PolyLine(points, color=color, weight=2.5, opacity=1).add_to(mapFribourg)

mapFribourg

# Transport Public Fribourgeois - OpenStreetMap Data

In [32]:
with open('fribourg_routes.geojson') as f:
    data = json.load(f)

In [33]:
# taken from OSM documentation
transport_types = ["train", "subway", "monorail", "tram", "light_rail", "bus", "trolleybus", "railway"]

In [34]:
# Does not contain bus line 8, verified on http://www.xn--pnvkarte-m4a.de/?lat=46.8&lon=7.15&zoom=10#7.1501;46.801;14
fribourg_routes = dict()
i=0
for route in data['features']:
    route_dict = dict()
    
    if route['geometry']['type'] != 'Point' \
    and route['properties']['route'] in transport_types \
    and route['properties'].get('network') != 'Flixbus':
        
        route_dict['name'] = route['properties'].get('name')
        route_dict['description'] = route['properties'].get('description')
        route_dict['network'] = route['properties'].get('network')
        route_dict['operator'] = route['properties'].get('operator')
        route_dict['ref'] = route['properties'].get('ref')
        route_dict['type'] = route['properties'].get('route')
        route_dict['line'] = route['geometry']['coordinates']
        
        fribourg_routes[i] = route_dict
        i += 1

In [35]:
latlon = [x for a in fribourg_routes.get(21).get('line')[-34:] for x in a]
#latlon = fribourg_routes.get(21).get('line')[-1]

locationOfFribourg = [46.805498, 7.142255]
mapFribourg = folium.Map(location=locationOfFribourg, zoom_start=15)

for coord in latlon:
    mapFribourg.add_child(folium.Marker(location=[coord[1], coord[0]]))

mapFribourg

In [36]:
labeled_points = []
for r in fribourg_routes:
    coordinates = fribourg_routes.get(r).get('line')
    if type(coordinates[0][0]) == list:
        labeled_points.append([x+[r] for a in coordinates for x in a])
    else:
        labeled_points.append([x+[r] for x in coordinates])

In [37]:
labeled_points = np.array([np.array(x) for a in labeled_points for x in a])

In [38]:
fbg_tree = scipy.spatial.KDTree(labeled_points[:, :2])

In [39]:
def get_closest_pt_lines(tree, segments, user_gps, labeled_points, lat=True, lon=True, dist_threshold=70):

    possible_transports = []
    path_id = 0
    
    # Each degree of the radius line of the earth corresponds to 111,139 meters
    dist_threshold = dist_threshold / 111139

    for path in segments:

        closest_points = tree.query_ball_point(user_gps.loc[path, ['longitude', 'latitude']].values, dist_threshold)

        if len([x for a in closest_points for x in a])==0:
            return None

        path_length = len(path)

        for i in range(path_length):
            for p in closest_points[i]:
                gps_id = path[i]

                user_lat, user_lon = user_gps.loc[gps_id].latitude, user_gps.loc[gps_id].longitude

                close_point_lat, close_point_lon = labeled_points[p][1], labeled_points[p][0]
                
                if lat and lon:
                    dist = calc_dist(user_lat, user_lon, close_point_lat, close_point_lon)
                elif lat:
                    dist = calc_dist(user_lat, close_point_lon, close_point_lat, close_point_lon)
                elif lon:
                    dist = calc_dist(close_point_lat, user_lon, close_point_lat, close_point_lon)
                else:
                    raise ValueError('Need to use at least one of latitude/longitude')
                    
                possible_transports.append([path_id, gps_id, labeled_points[p][-1], dist])

        path_id += 1
        
    pt_overlap = pd.DataFrame(possible_transports, columns=['path_id', 'gps_id', 'transport_id', 'dist'])
    pt_overlap = pt_overlap.sort_values(['path_id', 'gps_id', 'transport_id', 'dist'])
    pt_overlap = pt_overlap.groupby(['path_id', 'gps_id', 'transport_id'], as_index=False).agg({'dist': 'min'})
    
    return pt_overlap

In [40]:
def score_lines(overlap, nw_points, threshold=0.65, discount_factor=0.99, route_dict=fribourg_routes):
    closest_points = dict(overlap.groupby('gps_id', as_index=False).agg({'dist': 'min'}).values)
    path_lengths = dict(zip(range(len(nw_points)), [len(x) for x in nw_points]))
    
    overlap['score'] = overlap\
    .apply(lambda x: discount_factor**((x.dist - closest_points[x.gps_id])/closest_points[x.gps_id]), axis=1)
    
    overlap = overlap.groupby(['path_id', 'transport_id'], as_index=False)\
    .agg({'score': 'prod', 'gps_id': ['count', 'first', 'last']})

    overlap.columns = ['path_id', 'transport_id', 'score', 'overlap', 'first_point_id', 'last_point_id']

    overlap['overlap'] = overlap.apply(lambda x: x.overlap / path_lengths[x.path_id], axis=1)
    
    overlap['score'] = overlap['score'] * overlap['overlap']
    overlap = overlap.drop(columns=['overlap'])
    overlap = overlap[overlap.score>threshold]
    
    overlap = overlap.sort_values(['path_id', 'score'], ascending=[True, False]).reset_index(drop=True)
    
    overlap['ref'] = overlap.apply(lambda x: route_dict[x.transport_id].get('ref'), axis=1)
    
    return overlap

# GTFS - Timetable data

In [41]:
travel_day = datetime(2019,3,26)

In [42]:
fribourg_sbb['arrival_timestamp'] = fribourg_sbb['arrival_time'].apply(lambda x: timezone.localize(travel_day + timedelta(hours=int(x[:2]), minutes=int(x[3:5]), seconds=int(x[6:]))).timestamp())
fribourg_sbb['departure_timestamp'] = fribourg_sbb['departure_time'].apply(lambda x: timezone.localize(travel_day + timedelta(hours=int(x[:2]), minutes=int(x[3:5]), seconds=int(x[6:]))).timestamp())

In [43]:
def check_direction(trip_id, source_stop_ID, min_start_dist, seg_start_id, seg_end_id, route_name, trips):

    trip = trips[trips.trip_id==trip_id].copy()
    
    segment_end_coords = user_gps_df.loc[seg_end_id][['latitude', 'longitude']].values
    
    dist2end = []
    
    for stop in trip.itertuples():
            dist2end.append(calc_dist(stop[14], stop[15], segment_end_coords[0], segment_end_coords[1]))
            
    trip['dist2end'] = dist2end
    min_end_dist = trip.dist2end.min()
    
    destination_stop_ID = trip[trip.dist2end==min_end_dist].stop_sequence.values[0]
    
    start = [trip_id, source_stop_ID, seg_start_id, min_start_dist, True]
    end = [trip_id, destination_stop_ID, seg_end_id, min_end_dist, False]
    
    if destination_stop_ID > source_stop_ID:
        return [start, end]
    else:
        return None

In [44]:
def gen_candidates(scores_df, user_df, transport_df, lat=True, lon=True):
    candidates = []

    for tup in scores_df.itertuples():

        start_time = user_df.loc[tup[4]].timestamp_unix
        
        # filter to trips that could be up to 15 minutes late or up to 5 minutes early
        trip_ids = transport_df[(transport_df.departure_timestamp>start_time-900)
                                &(transport_df.departure_timestamp<start_time+300)].trip_id.unique()
        
        line_trips = transport_df[(transport_df.route_short_name==tup[6])&(transport_df.trip_id.isin(trip_ids))].copy()
        
        if lat and lon:
            start_loc = user_df.loc[tup[4]][['latitude', 'longitude']].values
            distances = [calc_dist(start_loc[0], start_loc[1], x[14], x[15]) for x in line_trips.itertuples()]
        elif lat:
            start_loc = user_df.loc[tup[4]][['latitude']].values
            distances = [calc_dist(start_loc[0], x[15], x[14], x[15]) for x in line_trips.itertuples()]
        elif lon:
            start_loc = user_df.loc[tup[4]][['longitude']].values
            distances = [calc_dist(x[14], start_loc[0], x[14], x[15]) for x in line_trips.itertuples()]
        else:
            raise ValueError('Need to use at least one of latitude/longitude')

        line_trips['dist2start'] = distances

        line_trips = line_trips.sort_values(['trip_id', 'dist2start'])

        candidate_line_trips = line_trips.groupby('trip_id', as_index=False).agg({'stop_sequence':'first', 
                                                                                  'departure_timestamp':'first',
                                                                                  'dist2start': 'first'})

        candidate_line_trips = candidate_line_trips[(candidate_line_trips.departure_timestamp>start_time-900)
                                                    &(candidate_line_trips.departure_timestamp<start_time+300)]

        candidate_line_trips = candidate_line_trips[['trip_id', 'stop_sequence', 'dist2start']].values

        direction = [check_direction(x[0], x[1], x[2], tup[4], tup[5], tup[6], line_trips) for x in candidate_line_trips]

        candidate_line_trips = [x for a in direction if a !=None for x in a]
        candidate_line_trips = [np.append(x, tup[1]) for x in candidate_line_trips]

        candidates = candidates + candidate_line_trips
        
    candidates_df = pd.DataFrame(candidates, columns=['trip_id', 'stop_sequence', 'gps_id', 'dist_to_stop', 'is_start', 'path_id'])
    candidates_df = candidates_df.astype(dtype={'stop_sequence': int, 'dist_to_stop': float})
    candidates_df = pd.merge(fribourg_sbb, candidates_df, on=['trip_id', 'stop_sequence'])
    candidates_df = candidates_df.sort_values(['path_id', 'trip_id', 'arrival_timestamp', 'stop_sequence'])
    candidates_df = candidates_df.reset_index(drop=True)
    
    return candidates_df

In [45]:
def top_candidates(candidates_df, user_df):
    candidates_df = candidates_df.copy()
    time_diffs = []
    for tup in candidates_df.itertuples():
        if tup[-2]:
            time_diffs.append(abs(tup[-5]-user_df.loc[int(tup[-4])].timestamp_unix))
        else:
            time_diffs.append(abs(tup[-6]-user_df.loc[int(tup[-4])].timestamp_unix))
            
    candidates_df['time_diff'] = time_diffs
    
    candidates_df = candidates_df.groupby(['path_id', 'route_short_name', 'trip_id'], as_index=False)\
    .agg({'time_diff': 'mean', 'dist_to_stop': 'mean'})
    
    candidates_df = candidates_df.round(2)
    
    candidates_df = candidates_df.sort_values(['path_id', 'dist_to_stop', 'time_diff'])
    
    candidates_df = candidates_df.groupby(['path_id', 'route_short_name'], as_index=False)\
    .agg({'time_diff': 'first', 'dist_to_stop': 'first'})
    
    return candidates_df

# Transport Public Fribourgeois - Live Data

In [46]:
import os
from datetime import timedelta 

In [47]:
def read_tpc(path, date=None):
    filenames = os.listdir(path)
    
    if date is not None:
        filenames = [x for x in filenames if date in x]
    
    files = []
    for f in filenames:
        try:
            files.append(pd.read_csv(path+f, sep=';'))
        except:
            print("Empty file: ", f)
            continue

    return pd.concat(files)

In [48]:
date = datetime.fromtimestamp(user_gps_df.iloc[0].timestamp_unix).strftime("%Y-%m-%d")
print(date)

2019-03-26


In [49]:
citylines_df = read_tpc('./TPF/CityLine_2019-03/', '2019-03-26')
regionallines_df = read_tpc('./TPF/RegionalLine 2019-03/', '2019-03-26')

Empty file:  2019-03-26_1002_1.plc


In [50]:
lines_df = pd.concat([citylines_df, regionallines_df])

In [51]:
tpf_df = lines_df[['Vehicle', 'Day', 'Arrival', 'Departure', 'Latitude', 'Longitude', 'Line', 
                   'Journey', 'Block', 'JourneySeq', 'Stop', 'StopName', 'StopSeq']].copy()

In [52]:
tpf_df['Day'] = tpf_df['Day'].apply(lambda x: timezone.localize(datetime.strptime(x, "%Y-%m-%d")))

In [53]:
actual_arrivals = []
actual_arrival_timestamps = []
actual_departures = []
actual_departure_timestamps = []

for tup in tpf_df.itertuples():
    actual_arrival = tup[2] + timedelta(hours=int(tup[3][:2]), minutes=int(tup[3][3:5]), seconds=int(tup[3][6:8]))
    actual_arrivals.append(actual_arrival)
    actual_arrival_timestamps.append(actual_arrival.timestamp())
    
    actual_departure = tup[2] + timedelta(hours=int(tup[4][:2]), minutes=int(tup[4][3:5]), seconds=int(tup[4][6:8]))
    actual_departures.append(actual_departure)
    actual_departure_timestamps.append(actual_departure.timestamp())
    
tpf_df.loc[: ,'actual_arrival'] = actual_arrivals
tpf_df.loc[: ,'actual_departure'] = actual_departures

tpf_df.loc[:, 'arrival_timestamp'] = actual_arrival_timestamps
tpf_df.loc[:, 'departure_timestamp'] = actual_departure_timestamps

In [55]:
stop_dict = tpf_df[~tpf_df.Stop.isnull()][['Stop', 'Latitude', 'Longitude']]
stop_dict = stop_dict.groupby('Stop').agg({'Latitude': 'mean', 'Longitude': 'mean'})
stop_dict = stop_dict.to_dict('index')

In [56]:
tpf_df.loc[(tpf_df.Latitude.isnull())&(~tpf_df.Stop.isnull()), 'Latitude'] = tpf_df[(tpf_df.Latitude.isnull())&(~tpf_df.Stop.isnull())].apply(lambda x: stop_dict.get(x.Stop)['Latitude'], axis=1)
tpf_df.loc[(tpf_df.Longitude.isnull())&(~tpf_df.Stop.isnull()), 'Longitude'] = tpf_df[(tpf_df.Longitude.isnull())&(~tpf_df.Stop.isnull())].apply(lambda x: stop_dict.get(x.Stop)['Longitude'], axis=1)
tpf_df = tpf_df[(~tpf_df.Latitude.isnull())&(~tpf_df.Longitude.isnull())]

In [57]:
def dtw(s, t, window):
    n, m = len(s), len(t)
    w = np.max([window, abs(n-m)])
    dtw_matrix = np.zeros((n+1, m+1))
    
    for i in range(n+1):
        for j in range(m+1):
            dtw_matrix[i, j] = np.inf
    dtw_matrix[0, 0] = 0
    
    for i in range(1, n+1):
        for j in range(np.max([1, i-w]), np.min([m, i+w])+1):
            dtw_matrix[i, j] = 0
    
    for i in range(1, n+1):
        for j in range(np.max([1, i-w]), np.min([m, i+w])+1):
            cost = abs(s[i-1] - t[j-1])
            # take last min from a square box
            last_min = np.min([dtw_matrix[i-1, j], dtw_matrix[i, j-1], dtw_matrix[i-1, j-1]])
            dtw_matrix[i, j] = sum(cost) + last_min
    return dtw_matrix[-1][-1]

In [58]:
def get_top_scorers(nw_points, transport_df, user_df, max_dist=0.2, threshold=1.25, lat=True, lon=True):
    first = []
    top = []
    for s in nw_points:
        # filter the day's gps data to time window of non walk segment with 30s buffer on each end
        candidates = transport_df[(transport_df.departure_timestamp>user_df.loc[s[0]].timestamp_unix-30)
                                  &(transport_df.arrival_timestamp<user_df.loc[s[-1]].timestamp_unix+30)]

        candidate_vehicles = candidates.Vehicle.unique()
        
        # use specified information
        if lat and lon:
            user_coords = user_df.loc[s][['latitude', 'longitude']].values
        elif lat:
            user_coords = user_df.loc[s][['latitude']].values
        elif lon:
            user_coords = user_df.loc[s][['longitude']].values
        else:
            raise ValueError('Need to make use of at least one of latitude or longitude')
        
        scores = []
        lines = []
        vehicles = []
        
        # for each vehicle in time window compute distance using dynamic time warping
        for v in candidate_vehicles:

            bus = candidates[(candidates.Vehicle==v)]
            line = bus[~bus.Line.isnull()].Line.unique()
            
            # if vehicle covers multiple lines in time window or there is only one GPS point for it, skip vehicle
            if len(line) != 1 or len(bus)==1:
                continue
            
            # use specified information
            if lat and lon:
                bus_coords = bus[['Latitude', 'Longitude']].values
            elif lat:
                bus_coords = bus[['Latitude']].values
            elif lon:
                bus_coords = bus[['Longitude']].values
            
            # compute distance
            score = dtw(user_coords, bus_coords, 2)

            lines.append(line[0])
            scores.append(score)
            vehicles.append(v)

        min_score = min(scores)
        
        # collect results for those vehicles having a distance of less than max_dist with user segment
        # additionally discard vehicles having a score significantly worse than the best
        first.append([x for x in list(zip(vehicles, scores, lines)) if x[1]==min_score][0]) 
        top.append([x for x in list(zip(vehicles, scores, lines)) if x[1]<max_dist and x[1] < threshold*min_score])
        
    return first, top

# Testing

### OSM

In [59]:
overlap_lat_lon_df = get_closest_pt_lines(fbg_tree, non_walk_points, user_gps_df, labeled_points)
overlap_lat_df = get_closest_pt_lines(fbg_tree, non_walk_points, user_gps_df, labeled_points, 
                                      lon=False, dist_threshold=50)
overlap_lon_df = get_closest_pt_lines(fbg_tree, non_walk_points, user_gps_df, labeled_points, 
                                      lat=False, dist_threshold=50)

In [60]:
overlap_lat_lon_scores_df = score_lines(overlap_lat_lon_df, non_walk_points)
overlap_lat_scores_df = score_lines(overlap_lat_df, non_walk_points, discount_factor=1, threshold=0.5)
overlap_lon_scores_df = score_lines(overlap_lon_df, non_walk_points, discount_factor=1, threshold=0.5)

In [61]:
overlap_lat_lon_scores_df

Unnamed: 0,path_id,transport_id,score,first_point_id,last_point_id,ref
0,0,17.0,1.0,0,8,2
1,0,21.0,1.0,0,8,338
2,0,25.0,1.0,0,8,340
3,0,29.0,1.0,0,8,339
4,1,1.0,0.981308,86,192,5
5,2,1.0,1.0,198,220,5
6,2,17.0,1.0,198,220,2
7,2,22.0,1.0,198,220,3
8,2,23.0,1.0,198,220,1
9,2,27.0,1.0,198,220,6


In [62]:
overlap_lon_scores_df

Unnamed: 0,path_id,transport_id,score,first_point_id,last_point_id,ref
0,0,17.0,1.0,0,8,2
1,0,21.0,1.0,0,8,338
2,0,25.0,1.0,0,8,340
3,0,29.0,1.0,0,8,339
4,1,1.0,0.953271,86,192,5
5,2,1.0,1.0,198,220,5
6,2,17.0,1.0,198,220,2
7,2,22.0,1.0,198,220,3
8,2,23.0,1.0,198,220,1
9,2,27.0,1.0,198,220,6


### Timetable

In [63]:
candidates_lat_lon_df = gen_candidates(overlap_lat_lon_scores_df, user_gps_df, fribourg_sbb)
candidates_lat_df = gen_candidates(overlap_lat_scores_df, user_gps_df, fribourg_sbb, lon=False)
candidates_lon_df = gen_candidates(overlap_lon_scores_df, user_gps_df, fribourg_sbb, lat=False)

In [65]:
top_candidates_lat_lon_df = top_candidates(candidates_lat_lon_df, user_gps_df)
top_candidates_lat_df = top_candidates(candidates_lat_df, user_gps_df)
top_candidates_lon_df = top_candidates(candidates_lon_df, user_gps_df)

In [66]:
top_candidates_lat_lon_df

Unnamed: 0,path_id,route_short_name,time_diff,dist_to_stop
0,0,2,37.5,92.4
1,1,5,29.0,33.09
2,2,1,146.0,23.14
3,2,2,26.0,23.14
4,2,3,266.0,23.14
5,2,5,154.0,23.14
6,2,6,154.0,23.14
7,3,3,54.5,10.87
8,3,5,54.5,10.87
9,4,4,136.5,98.32


In [67]:
top_candidates_lat_df

Unnamed: 0,path_id,route_short_name,time_diff,dist_to_stop
0,0,2,37.5,84.66
1,0,340,560.5,1639.42
2,1,5,269.0,30.37
3,2,1,146.0,20.45
4,2,2,26.0,20.45
5,2,3,266.0,20.45
6,2,5,154.0,20.45
7,2,6,154.0,20.45
8,3,3,54.5,10.53
9,3,5,54.5,10.53


In [68]:
top_candidates_lon_df

Unnamed: 0,path_id,route_short_name,time_diff,dist_to_stop
0,0,2,37.5,91.62
1,1,5,29.0,33.05
2,2,1,146.0,21.88
3,2,2,26.0,21.88
4,2,3,266.0,21.88
5,2,5,154.0,21.88
6,2,6,154.0,21.88
7,3,3,725.5,9.4
8,3,5,54.5,9.78
9,4,4,136.5,73.5


### Bus GPS

In [69]:
first_lat_lon, top_lat_lon = get_top_scorers(non_walk_points, tpf_df, user_gps_df)
first_lat, top_lat = get_top_scorers(non_walk_points, tpf_df, user_gps_df, lon=False)
first_lon, top_lon = get_top_scorers(non_walk_points, tpf_df, user_gps_df, lat=False)

In [70]:
first_lat_lon

[(523, 0.045611639999983744, 2.0),
 (557, 0.12298649999999434, 5.0),
 (513, 0.026181810000018402, 2.0),
 (590, 0.03828858999998097, 5.0),
 (354, 0.07024600999997421, 4.0),
 (352, 0.08015671000003977, 4.0)]

In [71]:
first_lat

[(152, 0.008447480000000951, 9.0),
 (557, 0.04131083999999419, 5.0),
 (513, 0.005876129999975888, 2.0),
 (561, 0.009034050000011007, 6.0),
 (354, 0.01980236444445893, 4.0),
 (562, 0.016072079999993605, 6.0)]

In [72]:
first_lon

[(108, 0.02638009000000263, 9.0),
 (557, 0.06342796000000916, 5.0),
 (530, 0.01383340000000377, 3.0),
 (530, 0.007549339999999738, 3.0),
 (354, 0.04511093888888684, 4.0),
 (558, 0.025735729999997403, 1.0)]

### GPS Noise

In [73]:
def blur_location(lat, lon, radius_m, fixed=True):
    original_point = geopy.Point(lat, lon)
    
    if not fixed:
        radius_m = random.uniform(0, radius_m)
        
    angle = random.uniform(0, 360)
    distance_vec = distance(meters=radius_m)

    new_point = distance_vec.destination(original_point, angle)
    
    return new_point.latitude, new_point.longitude 

In [74]:
blurred_user_gps_df = user_gps_df.copy()

blurred_coords = []
for tup in blurred_user_gps_df.itertuples():
    blurred_coords.append(blur_location(tup[3], tup[4], radius_m=100, fixed=True))
    
blurred_user_gps_df[['latitude', 'longitude']] = blurred_coords

In [75]:
# GPS
first_blurred, top_blurred = get_top_scorers(non_walk_points, tpf_df, blurred_user_gps_df)
first_blurred

[(523, 0.05188740273533465, 2.0),
 (557, 0.18630421121611107, 5.0),
 (513, 0.038094349817748174, 2.0),
 (590, 0.04413520036877383, 5.0),
 (354, 0.09840392828920574, 4.0),
 (554, 0.09065729058839711, 5.0)]

In [76]:
# OSM
overlap_blur_df = get_closest_pt_lines(fbg_tree, non_walk_points, blurred_user_gps_df, labeled_points, 
                                       dist_threshold=100)

overlap_blur_scores_df = score_lines(overlap_blur_df, non_walk_points, threshold=0.5)

overlap_blur_scores_df

Unnamed: 0,path_id,transport_id,score,first_point_id,last_point_id,ref
0,0,17.0,1.0,0,8,2
1,0,21.0,1.0,0,8,338
2,0,25.0,1.0,0,8,340
3,0,29.0,1.0,0,8,339
4,2,22.0,0.677585,198,220,3
5,2,23.0,0.65715,198,220,1
6,2,1.0,0.591366,200,220,5
7,2,17.0,0.571762,200,220,2
8,2,27.0,0.571762,200,220,6
9,2,8.0,0.571,200,220,181


In [78]:
# Timetable
candidates_blur_df = gen_candidates(overlap_blur_scores_df, blurred_user_gps_df, fribourg_sbb)

top_candidates_blur_df = top_candidates(candidates_blur_df, blurred_user_gps_df)

top_candidates_blur_df

Unnamed: 0,path_id,route_short_name,time_diff,dist_to_stop
0,0,2,37.5,130.42
1,2,1,146.0,71.49
2,2,2,18.5,72.44
3,2,3,266.0,71.49
4,2,5,161.5,72.44
5,2,6,161.5,72.44
6,3,3,54.5,60.64
7,3,5,54.5,60.64
8,4,4,128.0,150.41
9,5,4,68.66,126.76


### Time split

In [79]:
splits = 10
total_rows = user_gps_df.shape[0]
frag_size = round(total_rows/splits)
res_df = None

start_index = 0

for s in range(splits):
    frag_user_gps_df = user_gps_df.iloc[start_index:start_index+frag_size].reset_index(drop=True)
    start_index += frag_size
    
    frag_edge_df = points_to_edges(frag_user_gps_df)
    frag_edge_df, frag_walk_points, frag_non_walk_points = generate_segment_ids(frag_edge_df)
    frag_walk_points, frag_non_walk_points = edge_to_point_id(frag_edge_df, frag_walk_points, frag_non_walk_points)
    
    if frag_non_walk_points is None:
        continue
        
    overlap_frag_df = get_closest_pt_lines(fbg_tree, frag_non_walk_points, frag_user_gps_df, labeled_points, 
                                          dist_threshold=70)

    overlap_frag_scores_df = score_lines(overlap_frag_df, frag_non_walk_points, threshold=0.65)
    candidates_frag_df = gen_candidates(overlap_frag_scores_df, frag_user_gps_df, fribourg_sbb)
    top_candidates_frag_df = top_candidates(candidates_frag_df, frag_user_gps_df)
    
    top_candidates_frag_df['frag_id'] = s
    
    if res_df is None:
        res_df = top_candidates_frag_df
    else:
        res_df = pd.concat([res_df, top_candidates_frag_df])