# Qualitative analysis of Compression

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
from pymeos import *
import numpy as np

pymeos_initialize()
from dtaidistance import dtw_ndim

from geopandas import GeoDataFrame, read_file, GeoSeries
from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from geopy.distance import geodesic 
from pyproj import Geod
from shapely.ops import nearest_points
import json

### Trajectory loading

In [37]:
PATH='datasets/cars_8.csv'
df = pd.read_csv(PATH)
df = df.dropna()
df["Timestamp"] = pd.to_datetime(df["Timestamp"],format='mixed')
df = df.drop_duplicates(["Timestamp"])
df = df.sort_values(by= "Timestamp")
print(len(df))
df['point'] = df.apply(lambda row: TGeogPointInst(point=(row['Latitude'], row['Longitude']), timestamp=row['Timestamp']),axis=1)
df.drop(['Latitude', 'Longitude'], axis=1, inplace=True)
trajectories = df.groupby('id').aggregate(
    {
        'point': TGeogPointSeq.from_instants,
    }
).rename({'point': 'trajectory'}, axis=1)
trajectories

4942


Unnamed: 0_level_0,trajectory
id,Unnamed: 1_level_1
8,[0101000020E6100000687C04A3376E494037B3EBDE8AD...


In [38]:
trajectory_df = trajectories["trajectory"].values[0].to_dataframe()
trajectory_df

Unnamed: 0_level_0,geometry
time,Unnamed: 1_level_1
2020-06-01 10:56:08.148000+02:00,POINT (50.86107 4.46537)
2020-06-01 10:56:10.797745+02:00,POINT (50.86115 4.46541)
2020-06-01 10:56:26.284084+02:00,POINT (50.86184 4.46572)
2020-06-01 10:56:27.699117+02:00,POINT (50.86188 4.46575)
2020-06-01 10:56:32.628663+02:00,POINT (50.86199 4.46516)
...,...
2020-06-04 23:56:52.386056+02:00,POINT (50.86198 4.46521)
2020-06-04 23:56:56.863518+02:00,POINT (50.86188 4.46575)
2020-06-04 23:56:57.463518+02:00,POINT (50.86186 4.46573)
2020-06-04 23:56:59.514958+02:00,POINT (50.86184 4.46572)


### Compression Douglas Peucker

In [39]:
tolerance = 10/111139
compressed = trajectories.copy()
compressed["trajectory"] = compressed["trajectory"].apply(lambda x:x.simplify(tolerance,False))
compressed

Unnamed: 0_level_0,trajectory
id,Unnamed: 1_level_1
8,[0101000020E6100000687C04A3376E494037B3EBDE8AD...


In [40]:
compressed_df = compressed["trajectory"].values[0].to_dataframe()
compressed_df

Unnamed: 0_level_0,geometry
time,Unnamed: 1_level_1
2020-06-01 10:56:08.148000+02:00,POINT (50.86107 4.46537)
2020-06-01 10:56:27.699117+02:00,POINT (50.86188 4.46575)
2020-06-01 10:57:04.440925+02:00,POINT (50.86219 4.46423)
2020-06-01 10:58:07.201801+02:00,POINT (50.86335 4.46291)
2020-06-01 10:58:48.919558+02:00,POINT (50.86442 4.45975)
...,...
2020-06-04 22:51:06.759962+02:00,POINT (50.86440 4.45963)
2020-06-04 23:55:39.418280+02:00,POINT (50.86335 4.46291)
2020-06-04 23:56:29.012912+02:00,POINT (50.86218 4.46426)
2020-06-04 23:56:56.863518+02:00,POINT (50.86188 4.46575)


### Compression TD-TR

In [21]:
tolerance = 10/111139
compressed = trajectories.copy()
compressed["trajectory"] = compressed["trajectory"].apply(lambda x:x.simplify(tolerance,True))
compressed

Unnamed: 0_level_0,trajectory
id,Unnamed: 1_level_1
8,[0101000020E6100000687C04A3376E494037B3EBDE8AD...


In [22]:
compressed_df = compressed["trajectory"].values[0].to_dataframe()
compressed_df

Unnamed: 0_level_0,geometry
time,Unnamed: 1_level_1
2020-06-01 10:56:08.148000+02:00,POINT (50.86107 4.46537)
2020-06-01 10:56:27.699117+02:00,POINT (50.86188 4.46575)
2020-06-01 10:56:32.628663+02:00,POINT (50.86199 4.46516)
2020-06-01 10:56:34.662347+02:00,POINT (50.86200 4.46511)
2020-06-01 10:56:38.639256+02:00,POINT (50.86208 4.46465)
...,...
2020-06-04 23:56:30.600257+02:00,POINT (50.86216 4.46430)
2020-06-04 23:56:42.319279+02:00,POINT (50.86212 4.46445)
2020-06-04 23:56:45.848648+02:00,POINT (50.86209 4.46462)
2020-06-04 23:56:56.863518+02:00,POINT (50.86188 4.46575)


In [23]:
%%time
series_coords = trajectory_df.reset_index()[["geometry"]].values
series_time   = trajectory_df.reset_index()[["time"]].values

series1 = []
for i in range(len(series_coords)):
    p = series_coords[i][0]
    time = series_time[i][0]
    series1.append([time, p.x, p.y])

series_coords = compressed_df.reset_index()[["geometry"]].values
series_time   = compressed_df.reset_index()[["time"]].values
series2 = []
for i in range(len(series_coords)):
    p = series_coords[i][0]
    time = series_time[i][0]
    series2.append([time, p.x, p.y])

CPU times: user 86.3 ms, sys: 12.4 ms, total: 98.8 ms
Wall time: 85.6 ms


In [25]:
def dist(coord1, coord2):
    return geodesic(coord1, coord2).m

In [26]:
def compute_ED(initial, compressed):
    ED = []
    A = initial[0]
    compressed_path = LineString([Point(p[1], p[2]) for p in compressed]) 
    for i in range(len(initial)):
        B =  initial[i]
        point_B = Point(B[1], B[2])
        _point_B = compressed_path.interpolate(compressed_path.project(point_B))
        
        ED.append(dist((_point_B.x,_point_B.y), ((B[1], B[2]))))
    return ED
data = compute_ED(series1, series2)
sum(data)/len(data)

0.062052046672622846

In [41]:
trajectory_df["x"] = trajectory_df["geometry"].x 
trajectory_df["y"] = trajectory_df["geometry"].y 
compressed_df["x"] = trajectory_df["geometry"].x 
compressed_df["y"] = trajectory_df["geometry"].y 

In [42]:
trajectory_df.reset_index().to_csv("pymove_traj_df.csv", index=False)
compressed_df.reset_index().to_csv("pymove_comp_df.csv", index=False)

In [43]:
trajectory_df = pd.read_csv("pymove_traj_df.csv")
trajectory_df["time"] = pd.to_datetime(trajectory_df["time"])
compressed_df = pd.read_csv("pymove_comp_df.csv")
compressed_df["time"] = pd.to_datetime(compressed_df["time"])
def compute_speed_score(initial,compressed):
    prec = None
    go = False
    hist_data = []
    error_col = []
    for index, row in compressed.iterrows():
        if go:
            time1 = pd.to_datetime(prec["time"])
            time2 = pd.to_datetime(row["time"])
            initial_ = initial[(initial["time"] >= time1) & (initial["time"] <= time2)]
            speed1 = prec["speed"]
            speed2 = row["speed"]
            speed_compressed_avg = (speed1+speed2)/2
            speed_error = (np.mean(initial_["speed"]) - speed_compressed_avg)
            hist_data.append(speed_error)
            for _ in range(len(initial_)):
                error_col.append(speed_error)
            print()
            print(len(initial_),np.mean(initial_["speed"]), speed_compressed_avg)
        prec = row
        go = True
    return hist_data, error_col 
data, error_col = compute_speed_score(trajectory_df, compressed_df)


4 3.966900607088051 5.06173696951078

17 5.458850735892528 4.8591478132148564

25 4.742052169641794 3.877524151621553

19 5.476048319519628 5.972982416441868

30 6.552040171276535 7.682936436322145

15 6.979337398954136 6.929095716782257

25 8.706717614777068 7.739602475707488

6 6.0225367942806 5.1450629304070015

5 5.86381834844717 4.066850478304899

15 7.29984088143999 6.358060837209537

57 9.739775844179764 10.325246036015104

23 7.698795660740371 9.90730740841924

33 8.70540858539707 7.5476072964259036

22 10.24486156863107 9.520221581221918

23 9.701560132694885 4.928507759972375

39 9.880277435219897 4.439869209247243

22 7.771036398724925 7.475084465129731

61 9.405309581486028 9.892333495159242

16 6.3030243971538304 8.666532236217986

9 5.8021161492730045 4.636277233730977

13 9.531802265334793 5.458835068080558

17 7.515057055783495 6.7575036596589

16 6.5787551401031665 6.747336032142542

36 7.294884814642442 5.949819609899492

27 6.982066224617476 7.522818315315664

14 4.

In [31]:
data, error_col

([-1.094836362422729,
  -0.6907220416808464,
  -2.3866417887135647,
  1.5363428838769098,
  -2.080826280100844,
  1.6398767633474352,
  0.641071356236782,
  1.224417814527567,
  2.0302513154985666,
  0.13062656122236316,
  1.7044374432466451,
  2.5333857948798277,
  2.567281544258519,
  0.7775391463293921,
  0.5022876314744096,
  -2.6978480063448185,
  0.1583124438817749,
  2.076090986603213,
  -0.9797893364262737,
  -0.2693029063356329,
  -1.3918249490642989,
  -1.7926293640819395,
  2.3968326521214003,
  2.4912707741711415,
  -1.590396227390932,
  3.1475577354617403,
  -1.4478560992167022,
  1.049962870191779,
  -2.381803644507414,
  4.803251645276511,
  -2.0394542584518964,
  4.035092308622695,
  2.3345883120401876,
  2.898099846655154,
  1.3740727499185903,
  2.511318292793156,
  1.31271000206256,
  0.18201402258722243,
  3.323898475349763,
  -0.2013694364902454,
  1.461352594513368,
  -1.6631116666069587,
  3.786949108056632,
  -3.6596133204467245,
  3.0381571508920757,
  -2.81510

In [32]:
compressed_df["speed-error"] = [data[0]]+data
compressed_df.to_csv("csv_results/pymeos-DP-results.csv", index=False)


In [35]:
np.mean(np.abs(data))

2.065618533663404

In [36]:
compressed_df

Unnamed: 0,time,geometry,traj_id,speed,speed-error
0,2020-06-01 10:56:08.148000,POINT (50.861073 4.465373),1,5.061737,-1.094836
1,2020-06-01 10:56:27.699117,POINT (50.861884 4.465745),1,5.061737,-1.094836
2,2020-06-01 10:56:32.628663,POINT (50.861987 4.465161),1,13.304115,-0.690722
3,2020-06-01 10:56:34.662347,POINT (50.861996 4.46511),1,2.816270,-2.386642
4,2020-06-01 10:56:38.639256,POINT (50.862079 4.464652),1,12.943974,1.536343
...,...,...,...,...,...
769,2020-06-04 23:56:30.600257,POINT (50.86216 4.464302),1,5.629155,2.767177
770,2020-06-04 23:56:42.319279,POINT (50.862117 4.464447),1,1.427540,-0.529099
771,2020-06-04 23:56:45.848648,POINT (50.862086 4.464615),1,5.353225,0.548591
772,2020-06-04 23:56:56.863518,POINT (50.861884 4.465745),1,11.525484,0.293235
