In [25]:
from os import getcwd
from os.path import join, abspath, pardir
from json import dump
import geopandas as gp
from shapely.geometry import LineString
import pandas as pd
import numpy as np
from collections import namedtuple
import matplotlib.pyplot as plt
from shapely.ops import nearest_points
from IPython.display import display
from ipywidgets import interact, interactive, fixed, interact_manual, FloatSlider, Dropdown, Text, GridBox, VBox, HBox, Accordion, Output, Checkbox, Layout, IntProgress, Tab, Image, Button

##### Configs

In [2]:
parent_dir = abspath(join(getcwd(), pardir))
shape_files_dir = join(parent_dir, "2109_STIB_MIVB_Network")
gps_files_dir = join(parent_dir, "GPS")
line_shape_file = join(shape_files_dir, "ACTU_LINES.shp")
stops_shape_file = join(shape_files_dir, "ACTU_STOPS.shp")

##### Load files

In [3]:
line_df = gp.read_file(line_shape_file)
stops_df = gp.read_file(stops_shape_file)
gps_df = pd.read_csv(join(gps_files_dir, "GPStracks.csv"))

##### Helper functions for prediction of mode transport

In [4]:
def get_list_threshold(min=1, max=100):
    return [x for x in range(int(min), int(max)) if all(x % t for t in range(2, x))]

def get_spatial_join(df1, df2, threshold_list, column=None, **kwargs):
    if not isinstance(df1, gp.GeoDataFrame): raise ValueError("Kindly, pass a valid geo pandas dataframe.")
    if not isinstance(df2, gp.GeoDataFrame): raise ValueError("Kindly, pass a valid geo pandas dataframe.")
    if (not isinstance(threshold_list, list)) or (not len(threshold_list)): raise ValueError("Kindly, pass a valid threshold list.")
    
    # Make sure both geo-panda dataframes 'df1' & 'df2' have the same coordinate reference system (crs)
    if df1.crs != df2.crs:
        df1 = df1.to_crs(df2.crs)
    
    _threshold_ = float(0)
    df = None
    for threshold in threshold_list:
        df = df2.sjoin_nearest(df1, max_distance=float(threshold), distance_col="distances", **kwargs)
        if not df.empty:
            _threshold_ = threshold
            break
    if (column is None) or (column not in df.columns.to_list()):
        return _threshold_, list(), df
    else:
        return _threshold_, list(df[column].unique()), df
    
def get_intersection(list_1, list_2):
    if (not isinstance(list_1, list)) or (not isinstance(list_2, list)): raise ValueError("Kindly, pass a valid list for intersection.")
    
    return list( set(list_1).intersection( set(list_2) ) )

def plot_tracks(track_df, df, modes, col='LIGNE', title=str()):
    colors = ['Greys', 'Purples', 'Blues', 'Greens', 'Oranges', 'Reds', 'YlOrBr', 'YlOrRd', 'OrRd', 'PuRd', 'RdPu', 'BuPu', 'GnBu', 'PuBu', 'YlGnBu', 'PuBuGn', 'BuGn', 'YlGn']
    fig, ax = plt.subplots(figsize=(15, 15))
    track_df.to_crs(df.crs).plot(ax=ax, alpha=0.7, color="pink")
    for i, mode in enumerate(modes):
        df.loc[df[col].isin(modes)].plot(ax=ax, cmap=colors[i])
    fig.suptitle(title)
    return fig, ax

##### Predict the mode of transport for each GPS track

In [8]:
def predict_transport_mode(gps_df, line_df, stops_df, max_dist_threshold):
    if not isinstance(gps_df, pd.DataFrame): raise ValueError("Kindly, pass the gps track(s) data as a pandas dataframe.")
    if not isinstance(stops_df, gp.GeoDataFrame): raise ValueError("Kindly, pass the stops data as a geo pandas dataframe.")
    if not isinstance(line_df, gp.GeoDataFrame): raise ValueError("Kindly, pass the stops data as a geo pandas dataframe.")
    
    # Get list of all the tracks in the GPS data
    tracks = list(gps_df['TrackId'].unique())
    
    # Max. threshold for max_dist (in meters)
    # max_dist_threshold = float(100)
    
    # List of thresholds for lines and stops
    lines_max_dist_threshold = get_list_threshold(1, max_dist_threshold)
    stops_max_dist_threshold = get_list_threshold(5, max_dist_threshold)
    
    # For result
    Result = namedtuple('Result', ['track_id', 'mode', 'nearest', 'modes', 'fig', 'ax'])
    results = dict()
    
    # For each GPS track
    for track in tracks:
        
        # 1. Get 'track' data from gps_df
        track_df = gps_df.loc[gps_df['TrackId'].isin([track])]
        
        # 2. Convert into geo-pandas dataframe with coordinate reference system (crs) of '4326' (WGS 84)
        track_df = gp.GeoDataFrame(track_df, geometry=gp.points_from_xy(track_df['lon'], track_df['lat']), crs=4326)
        
        # 3. Get spatial join with lines
        line_max_dist_threshold, closest_lines, track_line_df = get_spatial_join(track_df, line_df, threshold_list=lines_max_dist_threshold, column="LIGNE")

        # 4. Get spatial join with stops
        stop_max_dist_threshold, closest_stops, track_stop_df = get_spatial_join(track_df, stops_df, threshold_list=stops_max_dist_threshold, column="Code_Ligne")
        
        # 5. Get intersection of common modes
        modes = get_intersection(closest_lines, closest_stops)
        
        # Sanity check (if no modes) -> probably the mode is "others"
        if not len(modes):
            results[track] = Result(track_id=track, mode="others", nearest="others", modes=list(), fig=None, ax=None)
            continue
                
        # 6. Save the plots for later usage (maybe)
        # fig, ax = plot_tracks(track_df, line_df, modes, 'LIGNE', "Track {}".format(track))
        
        # 7. For each transport mode, get the variant for each track and store it's geometry (for next step)
        # Also, change the coordinate reference system (crs) from '4326' to same as the 'line_df' one.
        modes_line_geo = dict()
        track_result = dict(others=0)
        track_df = track_df.to_crs(line_df.crs)
        for mode in modes:
            line_t = line_df.loc[line_df['LIGNE'].isin([mode])]
            variant_t = list(track_line_df.loc[track_line_df['LIGNE'].isin([mode])]['VARIANTE'].unique())
            line_t = line_t.loc[line_t['VARIANTE'].isin(variant_t)]

            modes_line_geo[mode] = list(line_t['geometry'])[0]
            track_result[mode] = 0
        
        # 8. For each point in our track, calculate the distance and see which mode is closer, increment the one which is closer. 
        # If the nearest point has more distance than the "max_dist_threshold", then increment the "others" count
        # max_dist_threshold = float(20)
        track_points = list(track_df['geometry'])
        for track_point in track_points:
            distance = dict()
            for mode in modes:
                line_geo = modes_line_geo[mode]
                nearest = nearest_points(line_geo, track_point)[0]
                nearest_distance = nearest.distance(track_point)
                distance[mode] = nearest_distance
            nearest = min(distance, key=lambda k: distance[k])
            if distance[nearest] > max_dist_threshold: nearest = 'others'
            track_result[nearest] = track_result[nearest] + 1

        # 9. Convert to probabilities
        for key in list(track_result.keys()):
            track_result[key] = (float(track_result[key]) / len(track_points)) * 100
            
        # 10. Get the nearest point
        _max_probability_ = max(track_result, key=lambda k: track_result[k])
        _nearest_ = track_result[_max_probability_]
        
        _mode_ = "others"
        
        if _max_probability_.find("b") != -1:
            _mode_ = "bus"
        elif _max_probability_.find("t") != -1 and _max_probability_ != "others":
            _mode_ = "tram"
        elif _max_probability_.find("m") != -1:
            _mode_ = "metro"

        if _max_probability_ != "others": modes = [_max_probability_]
        fig, ax = plot_tracks(track_df, line_df, modes, 'LIGNE', "Track {}".format(track))
        results[track] = Result(track_id=track, mode=_mode_, nearest=_max_probability_, modes=track_result, fig=fig, ax=ax)
    
    return results

In [46]:
tracks = ["None"] + list(gps_df['TrackId'].unique())
show_plot = False
max_dist_threshold = float(20)

##### Interactive Widget component(s)

In [54]:
w_tracks = Dropdown(options=tracks, value=tracks[0], description='Track ID ', disabled=False)
w_show_plot = Checkbox(value=show_plot, description='Show Plot(s)', disabled=False)
w_max_dist_threshold = FloatSlider(value=max_dist_threshold, min=float(0), max=float(100), step=float(0.1), description='Max. distance threshold ', disabled=False, continuous_update=False, orientation='horizontal', readout=True, readout_format='.1f')

w_output = Output()
w_button = Button(description="Let's Predict")

def on_button_clicked(b):
    with w_output:
        w_output.clear_output()
        if w_tracks.value == tracks[0]:
            print("Sorry! Please select a track ID first")
        else:
            track_gps_df = gps_df.loc[gps_df['TrackId'].isin([w_tracks.value])]
            results = predict_transport_mode(track_gps_df, line_df, stops_df, w_max_dist_threshold.value)
            for key in list(results.keys()):
                result = results[key]
                print("Track: {}\n\nMode: '{}'\n\nPredicted: '{}'\n".format(result[0], result[1], result[2]))
                
                _p_ = dict(sorted(result[3].items(), key=lambda item: item[1], reverse=True))
                
                print("Probabilities: {}".format(_p_))
                
                if w_show_plot.value:
                      display(result[4])

w_button.on_click(on_button_clicked)

meta_data = [ w_tracks, w_max_dist_threshold, w_show_plot, w_button, w_output ]
grid_box = GridBox(meta_data, layout=Layout(grid_template_columns="repeat(2, 50%)"))
grid_box

GridBox(children=(Dropdown(description='Track ID ', options=('None', 1, 3, 4, 5, 6, 7, 8, 10, 11), value='None…

In [31]:
button = Button(description="Click Me!")
output = Output()

display(button, output)

def on_button_clicked(b):
    with output:
        print("Button clicked.")

button.on_click(on_button_clicked)

Button(description='Click Me!', style=ButtonStyle())

Output()

In [None]:
results = predict_transport_mode(gps_df, line_df, stops_df)

In [None]:
results

In [None]:
for key in list(results.keys()):
    result = results[key]
    print("Track: {}\tMode: {}\tPredicted: {}\tProbabilities: {}".format(result[0], result[1], result[2], result[3]))

##### Get the GPS Tracks

In [None]:
gps_df = pd.read_csv(join(gps_files_dir, "GPStracks.csv"))
max_dist_threshold = float(100)
lines_max_dist_threshold = get_list_threshold(1, max_dist_threshold)
stops_max_dist_threshold = get_list_threshold(5, max_dist_threshold)

In [None]:
gps_df.head()

In [None]:

tracks

In [None]:
track = 10

In [None]:
track_df = gps_df.loc[gps_df['TrackId'].isin([track])]

In [None]:
track_df = gp.GeoDataFrame(track_df, geometry=gp.points_from_xy(track_df['lon'], track_df['lat']), crs=4326)

In [None]:
line_max_dist_threshold, closest_lines, track_line_df = get_spatial_join(track_df, line_df, threshold_list=lines_max_dist_threshold, column="LIGNE")

In [None]:
stop_max_dist_threshold, closest_stops, track_stop_df = get_spatial_join(track_df, stops_df, threshold_list=stops_max_dist_threshold, column="Code_Ligne")

In [None]:
modes = get_intersection(closest_lines, closest_stops)

In [None]:
lines = line_df.loc[line_df['LIGNE'].isin(modes)]
stops = stops_df.loc[stops_df['Code_Ligne'].isin(modes)]

In [None]:
track_df = track_df.to_crs(line_df.crs)

In [None]:
fig, ax = plot_tracks(track_df, line_df, modes, col='LIGNE', title="Track {}".format(track))

In [None]:
line_df['LIGNE'].unique()

In [None]:
mode = modes[0]

In [None]:
line_t = line_df.loc[line_df['LIGNE'].isin([mode])]
variant_t = list(track_line_df.loc[track_line_df['LIGNE'].isin([mode])]['VARIANTE'].unique())
line_t = line_t.loc[line_t['VARIANTE'].isin(variant_t)]

In [None]:
'060b'.find('t')

In [None]:
line_geo = list(line_t['geometry'])[0]

In [None]:
list(line_t['geometry'])

In [None]:
type(line_geo)

In [None]:
modes_line_geo = dict()
track_result_mode = dict(others=0)
for mode in modes:
    line_t = line_df.loc[line_df['LIGNE'].isin([mode])]
    variant_t = list(track_line_df.loc[track_line_df['LIGNE'].isin([mode])]['VARIANTE'].unique())
    line_t = line_t.loc[line_t['VARIANTE'].isin(variant_t)]
    
    modes_line_geo[mode] = list(line_t['geometry'])[0]
    track_result_mode[mode] = 0

In [None]:
track_result_mode.keys()

In [None]:
track_points = list(track_df['geometry'])

In [None]:
max_dist_threshold = float(10)
for track_point in track_points:
    distance = dict()
    for mode in modes:
        line_geo = modes_line_geo[mode]
        nearest = nearest_points(line_geo, track_point)[0]
        nearest_distance = nearest.distance(track_point)
        distance[mode] = nearest_distance
    nearest = min(distance, key=lambda k: distance[k])
    if distance[nearest] > max_dist_threshold: nearest = 'others'
    track_result_mode[nearest] = track_result_mode[nearest] + 1

# convert to probability
for key in list(track_result_mode.keys()):
    track_result_mode[key] = (float(track_result_mode[key]) / len(track_points)) * 100

In [None]:
track_result_mode

In [None]:
len(track_points)

In [None]:
290 + 24