# Super Ultra Alpha Omega RDP

In [None]:
import pandas as pd # To structure and manipulated data in a DataFrame format
import geopandas as gpd # To work with spatial data in a DataFrame
from geopandas import GeoDataFrame # To create a GeoDataFrame from a DataFrame

from shapely.geometry import shape, Point, LineString # To create line geometries that can be used in a GeoDataFrame

import matplotlib.pyplot as plt
plt.rcParams['axes.xmargin'] = 0.1
plt.rcParams['axes.ymargin'] = 0.1
%matplotlib inline


## INPUT DATA

In [None]:
df= pd.read_csv('input/trip_index.csv')
df.head()


**Chuyển sang dạng GeoDataframe:**

In [None]:
geometry = [Point(xy) for xy in zip(df.x, df.y)]
# gdf = df.drop(['y', 'x'], axis=1)
# gdf = gdf.sort_values('datetime', ascending = True)
gdf = GeoDataFrame(df, crs="EPSG:4326", geometry=geometry)

gdf['datetime'] = pd.to_datetime(gdf['datetime'])
gdf = gdf.sort_values(['vehicle', 'datetime'], ascending=[True, True])
gdf.reset_index(drop=True, inplace=True)
# show gdf data
gdf.head(100)

https://www.youtube.com/watch?v=9Q8nEA_0ccg
https://github.com/FlorianWilhelm/gps_data_with_python

## EXTRACTING POINTS USE `RAMER-DOUGLAS-PEUCKER` ALGORITHM 

Extract subset for testing

In [None]:
# gdf = gdf[:1000]

In [None]:
from rdp import rdp
import numpy as np

Get list vehicle id

In [None]:
''' get_vehicles() params:
input:
    gdf: geodataframe
output:
    vehicles: list vehicle id
'''
def get_trajecs(gdf):
    # get list trajecs
    trajecs = gdf.drop_duplicates(subset='Trip')['Trip']
    trajecs = np.array(trajecs, dtype=object)
    
    return trajecs

Get points for RDP algorithm

In [None]:
''' get_points() params:
input:
    gdf: geodataframe
output:
    points: list points(x, y)
'''
def get_points(gdf):
    # create vector (n,1) 
    x_arr = gdf['geometry'].x.to_numpy().T
    x_arr = x_arr[:,np.newaxis]
    # create vector (n,1) 
    y_arr = gdf["geometry"].y.to_numpy().T
    y_arr = y_arr[:, np.newaxis]
    # get points
    points = np.concatenate((x_arr,y_arr), axis=1) # axis=1 -> columns effect
    
    return points

Apply RDP algorithm with every single trajectory

In [None]:
''' run_rdp() params:
gdf: geodataframe
epsilon: 
algo='iter'
'''
def run_rdp(gdf, eps):
    # init empty geodataframe
    gdf_rdp = None
    # get list trajecs
    trajecs = get_trajecs(gdf)
    # run RDP with a trajectory
    for idx,id in enumerate(trajecs):
        subgdf = gdf[gdf['Trip'] == id]
        # get points
        points = get_points(subgdf)
        # ignore trajec has length < 10
        if len(points) >= 10:
            ''' rdp() params:
            arr: numpy array with shape (n,d) where `n` is numbers of points, `d` is their dimension
            algo: 2 options "iter" for iterative | "rec" for recursive 
            return_mask: `True` or `False`: return mask instead of simplified array, can be use with algo="iter" mode

            source: `https://rdp.readthedocs.io/en/latest/?badge=latest` (RDP docs)
            '''
            mask = rdp(points, epsilon=eps, algo="iter", return_mask=True)
            # concat gdf
            if idx == 0:
                gdf_rdp = subgdf[mask]
            else:
                gdf_rdp = pd.concat([gdf_rdp, subgdf[mask]])
                
    return gdf_rdp
        

In [None]:
%%time
gdf_rdp = run_rdp(gdf, eps=0.0001)

In [None]:
len(gdf)

## Visualizing data into folium map

In [None]:
import folium 

### helper functions: visualize into folium map

In [None]:
colors = [
    'red',
    'yellow',
    'blue',
    'lightred',
    'orange',
    'green',
    'lightgreen',
    'purple',
    'pink']

def add_point(mapobj, gdf, colors):
    #Nạp x,y từ dataframe vào list coords
    coords = list(zip(gdf["geometry"].x, gdf["geometry"].y))
    #Hiển thị trên mapobj
    for coord in coords:
        folium.CircleMarker(location = coord,
                            radius = 1.5, 
                            fill = True,
                            fill_opacity = 0.75,
                            color = colors,
                            weight = 0.01).add_to(mapobj)
           
def add_lines(mapobj, gdf, color):
    coords = list(zip(gdf["geometry"].x, gdf["geometry"].y))
    folium.PolyLine(coords, color=color, weight=1, opacity=1).add_to(mapobj)
       
    
'''
Hàm hiển thị map.
Tùy chọn:
- mapobj: bản đồ nền
- gdf: geodataframe
- start: lộ trình bắt đầu
- end: lộ trình kết thúc
- mask_type: dạng đường (1) và dạng điểm (0)
'''
def show_n_route(mapobj, gdf, mask_type):
    # Khởi tạo bản đồ mapobj
    f = folium.Figure(height = 600)
    mapobj.add_to(f)

    # Get list trajecs id
    trajecs = get_trajecs(gdf)
    # Show every single trajec
    for idx,id in enumerate(trajecs):
        # get sub trajec
        subgdf = gdf[gdf['Trip'] == id]

        if mask_type == 0:
            add_point(mapobj, subgdf, colors[idx % len(colors)])
        else:
            add_lines(mapobj, subgdf, colors[idx % len(colors)])
  
    return mapobj

### BEFORE APPLYING RDP ALGORITHM

In [None]:
# map
mapobj1 = folium.Map([10.783284, 106.682347], zoom_start = 15, tiles='Cartodb dark_matter')

# # show points fisrt
# show_n_route(mapobj1, gdf, 0)
# show lines
show_n_route(mapobj1, gdf, 0)


### AFTER

In [None]:
# map
mapobj2 = folium.Map([10.783284, 106.682347], zoom_start = 15, tiles='Cartodb dark_matter')

# show lines fisrt
show_n_route(mapobj2, gdf_rdp, 1)
# show points 
show_n_route(mapobj2, gdf_rdp, 0)


Saving output

In [None]:
fname1 = "output/before.html"
mapobj1.save(fname1)

In [None]:
fname2 = "output/after.html"
mapobj2.save(fname2)

In [None]:
gdf_rdp.to_csv("output/trip_rdp.csv")

In [None]:
gdf.count()

In [None]:
gdf_rdp.count()