# Super Ultra Alpha Omega RDP

In [1]:
import pandas as pd # To structure and manipulated data in a DataFrame format
import geopandas as gpd # To work with spatial data in a DataFrame
from geopandas import GeoDataFrame # To create a GeoDataFrame from a DataFrame

from shapely.geometry import shape, Point, LineString # To create line geometries that can be used in a GeoDataFrame

import matplotlib.pyplot as plt
plt.rcParams['axes.xmargin'] = 0.1
plt.rcParams['axes.ymargin'] = 0.1
%matplotlib inline


## INPUT DATA

In [2]:
df= pd.read_csv('data/trip_index.csv')
df.head()


Unnamed: 0,vehicle,level_1,datetime,speed,y,x,heading,vehicleType,time_interval,distance,heading_interval,Trip
0,15C01678,0,2018-04-24 17:56:31,8.0,106.681921,10.765486,30.579252,500,11.0,32.364445,0.0,0
1,15C01678,1,2018-04-24 17:56:42,13.0,106.682066,10.765734,45.469121,500,8.0,33.819979,14.889869,0
2,15C01678,2,2018-04-24 17:56:50,18.0,106.68229,10.765952,43.870221,500,6.0,33.21313,1.5989,0
3,15C01678,3,2018-04-24 17:56:56,20.0,106.682498,10.766169,43.404318,500,6.0,35.094524,0.465903,0
4,15C01678,4,2018-04-24 17:57:02,22.0,106.682722,10.7664,40.975231,500,5.0,32.605395,2.429086,0


**Chuyển sang dạng GeoDataframe:**

In [3]:
geometry = [Point(xy) for xy in zip(df.x, df.y)]
# gdf = df.drop(['y', 'x'], axis=1)
# gdf = gdf.sort_values('datetime', ascending = True)
gdf = GeoDataFrame(df, crs="EPSG:4326", geometry=geometry)

gdf['datetime'] = pd.to_datetime(gdf['datetime'])
gdf = gdf.sort_values(['vehicle', 'datetime'], ascending=[True, True])
gdf.reset_index(drop=True, inplace=True)
# show gdf data
gdf.head(100)

Unnamed: 0,vehicle,level_1,datetime,speed,y,x,heading,vehicleType,time_interval,distance,heading_interval,Trip,geometry
0,15C01678,0,2018-04-24 17:56:31,8.0,106.681921,10.765486,30.579252,500,11.0,32.364445,0.000000,0,POINT (10.765 106.682)
1,15C01678,1,2018-04-24 17:56:42,13.0,106.682066,10.765734,45.469121,500,8.0,33.819979,14.889869,0,POINT (10.766 106.682)
2,15C01678,2,2018-04-24 17:56:50,18.0,106.682290,10.765952,43.870221,500,6.0,33.213130,1.598900,0,POINT (10.766 106.682)
3,15C01678,3,2018-04-24 17:56:56,20.0,106.682498,10.766169,43.404318,500,6.0,35.094524,0.465903,0,POINT (10.766 106.682)
4,15C01678,4,2018-04-24 17:57:02,22.0,106.682722,10.766400,40.975231,500,5.0,32.605395,2.429086,0,POINT (10.766 106.683)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,15C01678,28,2018-04-24 18:08:26,31.0,106.673603,10.782137,298.342905,500,4.0,35.423325,0.258614,3,POINT (10.782 106.674)
96,15C01678,29,2018-04-24 18:08:30,29.0,106.673322,10.782293,299.085772,500,4.0,32.548663,0.742867,3,POINT (10.782 106.673)
97,15C01678,30,2018-04-24 18:08:34,29.0,106.673057,10.782448,299.790638,500,4.0,33.407999,0.704866,3,POINT (10.782 106.673)
98,15C01678,31,2018-04-24 18:08:38,29.0,106.672790,10.782584,298.141166,500,8.0,64.508555,1.649472,3,POINT (10.783 106.673)


https://www.youtube.com/watch?v=9Q8nEA_0ccg
https://github.com/FlorianWilhelm/gps_data_with_python

## EXTRACTING POINTS USE `RAMAS-DOUGLAS-PEUCKER` ALGORITHM 

Extract subset for testing

In [4]:
# gdf = gdf[:1000]

In [5]:
from rdp import rdp
import numpy as np

Get list vehicle id

In [6]:
''' get_vehicles() params:
input:
    gdf: geodataframe
output:
    vehicles: list vehicle id
'''
def get_trajecs(gdf):
    # get list trajecs
    trajecs = gdf.drop_duplicates(subset='Trip')['Trip']
    trajecs = np.array(trajecs, dtype=object)
    
    return trajecs

Get points for RDP algorithm

In [7]:
''' get_points() params:
input:
    gdf: geodataframe
output:
    points: list points(x, y)
'''
def get_points(gdf):
    # create vector (n,1) 
    x_arr = gdf['geometry'].x.to_numpy().T
    x_arr = x_arr[:,np.newaxis]
    # create vector (n,1) 
    y_arr = gdf["geometry"].y.to_numpy().T
    y_arr = y_arr[:, np.newaxis]
    # get points
    points = np.concatenate((x_arr,y_arr), axis=1) # axis=1 -> columns effect
    
    return points

Apply RDP algorithm with every single trajectory

In [8]:
''' run_rdp() params:
gdf: geodataframe
epsilon: 
algo='iter'
'''
def run_rdp(gdf, eps):
    # init empty geodataframe
    gdf_rdp = None
    # get list trajecs
    trajecs = get_trajecs(gdf)
    # run RDP with a trajectory
    for idx,id in enumerate(trajecs):
        subgdf = gdf[gdf['Trip'] == id]
        # get points
        points = get_points(subgdf)
        # ignore trajec has length < 10
        if len(points) >= 10:
            ''' rdp() params:
            arr: numpy array with shape (n,d) where `n` is numbers of points, `d` is their dimension
            algo: 2 options "iter" for iterative | "rec" for recursive 
            return_mask: `True` or `False`: return mask instead of simplified array, can be use with algo="iter" mode

            source: `https://rdp.readthedocs.io/en/latest/?badge=latest` (RDP docs)
            '''
            mask = rdp(points, epsilon=eps, algo="iter", return_mask=True)
            # concat gdf
            if idx == 0:
                gdf_rdp = subgdf[mask]
            else:
                gdf_rdp = pd.concat([gdf_rdp, subgdf[mask]])
                
    return gdf_rdp
        

In [9]:
%%time
gdf_rdp = run_rdp(gdf, eps=0.00001)

Wall time: 1min 22s


In [10]:
len(gdf)

140189

## Visualizing data into folium map

In [11]:
import folium 

### helper functions: visualize into folium map

In [12]:
colors = [
    'red',
    'yellow',
    'blue',
    'lightred',
    'orange',
    'green',
    'lightgreen',
    'purple',
    'pink']

def add_point(mapobj, gdf, colors):
    #Nạp x,y từ dataframe vào list coords
    coords = list(zip(gdf["geometry"].x, gdf["geometry"].y))
    #Hiển thị trên mapobj
    for coord in coords:
        folium.CircleMarker(location = coord,
                            radius = 1.5, 
                            fill = True,
                            fill_opacity = 0.75,
                            color = colors,
                            weight = 0.01).add_to(mapobj)
           
def add_lines(mapobj, gdf, color):
    coords = list(zip(gdf["geometry"].x, gdf["geometry"].y))
    folium.PolyLine(coords, color=color, weight=1, opacity=1).add_to(mapobj)
       
    
'''
Hàm hiển thị map.
Tùy chọn:
- mapobj: bản đồ nền
- gdf: geodataframe
- start: lộ trình bắt đầu
- end: lộ trình kết thúc
- mask_type: dạng đường (1) và dạng điểm (0)
'''
def show_n_route(mapobj, gdf, mask_type):
    # Khởi tạo bản đồ mapobj
    f = folium.Figure(height = 400)
    mapobj.add_to(f)

    # Get list trajecs id
    trajecs = get_trajecs(gdf)
    # Show every single trajec
    for idx,id in enumerate(trajecs):
        # get sub trajec
        subgdf = gdf[gdf['Trip'] == id]

        if mask_type == 0:
            add_point(mapobj, subgdf, colors[idx % len(colors)])
        else:
            add_lines(mapobj, subgdf, colors[idx % len(colors)])
  
    return mapobj

### BEFORE APPLYING RDP ALGORITHM

In [None]:
# map
mapobj1 = folium.Map([10.783284, 106.682347], zoom_start = 15, tiles='Cartodb dark_matter')

# # show points fisrt
# show_n_route(mapobj1, gdf, 0)
# show lines
show_n_route(mapobj1, gdf, 0)


### AFTER

In [None]:
# map
mapobj2 = folium.Map([10.783284, 106.682347], zoom_start = 15, tiles='Cartodb dark_matter')

# show points fisrt
show_n_route(mapobj2, gdf_rdp, 0)
# show lines
#show_n_route(mapobj2, gdf_rdp, 1)

Save output

In [None]:
fname1 = "before.html"
mapobj1.save(fname1)

In [None]:
fname2 = "after.html"
mapobj2.save(fname2)

Saving output

In [None]:
gdf_rdp.to_csv("points_smoothing_rdp.csv")

In [None]:
gdf.head()