In [1]:
import pandas as pd
import osmnx as ox
import numpy as np

In [90]:
fname = 'waypoint_20180427_202105170936.csv'
path='input/'
df = pd.read_csv(f'{path}{fname}')
df = df.drop_duplicates(subset=['vehicle', 'y', 'x', 'datetime'], keep='first')
df['datetime'] = pd.to_datetime(df['datetime'])
df['ts'] = df.datetime.values.astype(np.int64) // 10 ** 9
df = df.loc[:,['vehicle','heading', 'y', 'x', 'ts']]
# groupby
df = df.sort_values(['vehicle', 'ts'], ascending=[True, True])
df.reset_index(drop=True, inplace=True)
df.head(10) 

Unnamed: 0,vehicle,heading,y,x,ts
0,14N4768,156.0,10.77235,106.66513,1524788010
1,14N4768,157.0,10.77058,106.66583,1524788041
2,14N4768,161.0,10.76758,106.66703,1524788101
3,14N4768,95.0,10.76768,106.67252,1524788220
4,14N4768,101.0,10.76752,106.67428,1524788250
5,14N4768,108.0,10.7667,106.67783,1524788311
6,14N4768,325.0,10.76547,106.69688,1524809359
7,14N4768,329.0,10.76788,106.69542,1524809479
8,14N4768,330.0,10.76955,106.69448,1524809510
9,14N4768,324.0,10.7707,106.69388,1524809569


In [91]:
def data_filter(df, reduce_t=4):
    max_x = df['x'].max()
    max_y = df['y'].max()
    min_x = df['x'].min()
    min_y = df['y'].min()
    
    range_x = min_x + (max_x - min_x)/reduce_t
    range_y = min_y + (max_y - min_y)/reduce_t
    df = df[(df.x < range_x) & (df.y < range_y)]
    return df

In [92]:
df = data_filter(df)
df

Unnamed: 0,vehicle,heading,y,x,ts
1,14N4768,157.0,10.770580,106.665830,1524788041
2,14N4768,161.0,10.767580,106.667030,1524788101
3,14N4768,95.0,10.767680,106.672520,1524788220
13,14N4768,344.0,10.767700,106.667200,1524827103
14,14N4768,331.0,10.768700,106.666680,1524827133
...,...,...,...,...,...
2054659,98C10740,64.0,10.766316,106.664480,1524849538
2054660,98C10740,66.0,10.767382,106.666670,1524849566
2054661,98C10740,52.0,10.769275,106.670074,1524849628
2054662,98C10740,62.0,10.770380,106.672020,1524849668


In [93]:
df['y_shifted'] = df.y.shift(1, fill_value=0)
df['x_shifted'] = df.x.shift(1, fill_value=0)
df['ts_shifted'] = df.ts.shift(1, fill_value=0)
df['vehicle_shifted'] = df.vehicle.shift(1, fill_value=0)
df.head()

Unnamed: 0,vehicle,heading,y,x,ts,y_shifted,x_shifted,ts_shifted,vehicle_shifted
1,14N4768,157.0,10.77058,106.66583,1524788041,0.0,0.0,0,0
2,14N4768,161.0,10.76758,106.66703,1524788101,10.77058,106.66583,1524788041,14N4768
3,14N4768,95.0,10.76768,106.67252,1524788220,10.76758,106.66703,1524788101,14N4768
13,14N4768,344.0,10.7677,106.6672,1524827103,10.76768,106.67252,1524788220,14N4768
14,14N4768,331.0,10.7687,106.66668,1524827133,10.7677,106.6672,1524827103,14N4768


In [94]:
from haversine import haversine, Unit # (lat, lon) # print(tuple(Unit))
from shapely.geometry import Point, LineString

distance = []
ts_intvl = []
for idx, row in df.iterrows():
    if row.ts_shifted==0:
        print('begin')
        dis = 0
        time = 0
    else:
        point_start = (row.y, row.x)
        point_end = (row.y_shifted, row.x_shifted)
        # bearing = ox.bearing.calculate_bearing(point_start.x, point_start.y, point_end.x, point_end.y)
        # print(bearing, row.heading, ':', abs(bearing-row.heading))
        dis = haversine(point_start, point_end, unit=Unit.METERS)
        time = abs(row.ts_shifted - row.ts)
    distance.append(dis)
    ts_intvl.append(time)
df['distance'] = distance
df['ts_intvl'] = ts_intvl
df.head(10)

begin


Unnamed: 0,vehicle,heading,y,x,ts,y_shifted,x_shifted,ts_shifted,vehicle_shifted,distance,ts_intvl
1,14N4768,157.0,10.77058,106.66583,1524788041,0.0,0.0,0,0,0.0,0
2,14N4768,161.0,10.76758,106.66703,1524788101,10.77058,106.66583,1524788041,14N4768,358.416169,60
3,14N4768,95.0,10.76768,106.67252,1524788220,10.76758,106.66703,1524788101,14N4768,599.815655,119
13,14N4768,344.0,10.7677,106.6672,1524827103,10.76768,106.67252,1524788220,14N4768,581.146382,38883
14,14N4768,331.0,10.7687,106.66668,1524827133,10.7677,106.6672,1524827103,14N4768,124.863755,30
15,14N4768,338.0,10.76972,106.66618,1524827163,10.7687,106.66668,1524827133,14N4768,125.884984,30
31,15A24926,0.0,10.765512,106.66778,1524819560,10.76972,106.66618,1524827163,14N4768,499.486392,7603
32,15A24926,0.0,10.766058,106.667656,1524819570,10.765512,106.66778,1524819560,15A24926,62.205225,10
33,15A24926,0.0,10.766694,106.667496,1524819580,10.766058,106.667656,1524819570,15A24926,72.847856,10
34,15A24926,0.0,10.767138,106.6674,1524819590,10.766694,106.667496,1524819580,15A24926,50.472078,10


In [95]:
trip_ids = []
id_counter = 0

ts_thresh = 80
dis_thresh = 150
for idx, row in df.iterrows():
    if (row.ts_intvl<ts_thresh and row.distance<dis_thresh) and row.vehicle==row.vehicle_shifted:
        trip_ids.append(id_counter)
    else:
        id_counter = id_counter+1
        trip_ids.append(id_counter)
df['trip'] = trip_ids
df.head(10)

Unnamed: 0,vehicle,heading,y,x,ts,y_shifted,x_shifted,ts_shifted,vehicle_shifted,distance,ts_intvl,trip
1,14N4768,157.0,10.77058,106.66583,1524788041,0.0,0.0,0,0,0.0,0,1
2,14N4768,161.0,10.76758,106.66703,1524788101,10.77058,106.66583,1524788041,14N4768,358.416169,60,2
3,14N4768,95.0,10.76768,106.67252,1524788220,10.76758,106.66703,1524788101,14N4768,599.815655,119,3
13,14N4768,344.0,10.7677,106.6672,1524827103,10.76768,106.67252,1524788220,14N4768,581.146382,38883,4
14,14N4768,331.0,10.7687,106.66668,1524827133,10.7677,106.6672,1524827103,14N4768,124.863755,30,4
15,14N4768,338.0,10.76972,106.66618,1524827163,10.7687,106.66668,1524827133,14N4768,125.884984,30,4
31,15A24926,0.0,10.765512,106.66778,1524819560,10.76972,106.66618,1524827163,14N4768,499.486392,7603,5
32,15A24926,0.0,10.766058,106.667656,1524819570,10.765512,106.66778,1524819560,15A24926,62.205225,10,5
33,15A24926,0.0,10.766694,106.667496,1524819580,10.766058,106.667656,1524819570,15A24926,72.847856,10,5
34,15A24926,0.0,10.767138,106.6674,1524819590,10.766694,106.667496,1524819580,15A24926,50.472078,10,5


In [96]:
df['is_dup'] =df.trip.duplicated()
df['dup_shifted'] = df.is_dup.shift(-1, fill_value=False)


In [97]:
df['is_keep'] = df.is_dup | df.dup_shifted
df = df.loc[df.is_keep,:]

In [98]:
df = df.loc[:,['y', 'x', 'ts','trip']]
df

Unnamed: 0,y,x,ts,trip
13,10.767700,106.667200,1524827103,4
14,10.768700,106.666680,1524827133,4
15,10.769720,106.666180,1524827163,4
31,10.765512,106.667780,1524819560,5
32,10.766058,106.667656,1524819570,5
...,...,...,...,...
2054599,10.767103,106.666300,1524856830,50992
2054601,10.769398,106.666320,1524857000,50994
2054602,10.770076,106.666070,1524857010,50994
2054662,10.770380,106.672020,1524849668,50998


In [100]:
df.to_csv('my_preprocess025.csv',index=False)
map_obj.save('my_preprocess025.html')

# visual


In [46]:
import folium
colors = [
    'green',
    'red',
    'orange',
    'yellow',
    'pink']

def create_map_obj():
    # Khởi tạo bản đồ mapobj
    f = folium.Figure(height=600)
    mapobj = folium.Map([10.778015126603638, 106.68162304214593], zoom_start=15, tiles='Cartodbpositron')
    mapobj.add_to(f)
    return mapobj

def add_point(map_obj, series, color):
    # Nạp x,y từ dataframe vào list coords
    coord =(series.y, series.x)
    # Hiển thị trên map_obj
    folium.CircleMarker(location=coord,
                        radius=2,
                        fill=True,
                        fill_opacity=1,
                        color=color,
                        weight=1).add_to(map_obj)
    return map_obj

def add_points(map_obj, df, color):
    # Nạp x,y từ dataframe vào list coords
    coords = list(zip(df.y, df.x))
    # Hiển thị trên map_obj
    for coord in coords:
        folium.CircleMarker(location=coord,
                            radius=2,
                            fill=True,
                            fill_opacity=0.25,
                            color=color,
                            weight=1).add_to(map_obj)
    return map_obj

def add_lines(map_obj, df, color):
    coords = list(zip(df.y, df.x))
    folium.PolyLine(coords, color=color, weight=3, opacity=1).add_to(map_obj)
    return map_obj

def show_trip(mapobj, trip_df, connect=0, color_key=1):
    """
    Describe: Show 1 trip \n
    connect = 0 -> show point only,      connect = 1 -> show line
    color_key: choose trip's color
    """
    # init colors
    colors = ['green','red','orange','yellow','pink']

    # coloring into mapobj
    add_points(mapobj, trip_df, colors[color_key % len(colors)])
    if connect == 1:
        add_lines(mapobj, trip_df, colors[color_key % len(colors)])


    return mapobj

In [None]:
map_obj = create_map_obj()
map_obj = show_trip(map_obj, df[df.trip==14],connect=1, color_key=10%len(colors))
map_obj

In [None]:
map_obj = create_map_obj()
trips = df['trip'].unique()
for id in trips:
    map_obj = show_trip(map_obj, df[df.trip==id],connect=1, color_key=id%len(colors))
map_obj
