# Qualitative analysis of Compression

In [7]:
import skmob
import pandas as pd
import json
import numpy as np
import folium
from shapely.geometry import Point, LineString, Polygon
from skmob.preprocessing import compression, filtering, detection
from shapely.ops import nearest_points
from dtaidistance import dtw_ndim

from geopandas import GeoDataFrame, read_file, GeoSeries
from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from geopy.distance import geodesic 
from pyproj import Geod
from shapely.ops import nearest_points
import json

### Trajectory loading

In [3]:
df= pd.read_csv("datasets/berlinMOD_sample.csv")
traj = skmob.TrajDataFrame(data=df, latitude='Latitude', longitude='Longitude', trajectory_id='id', datetime='Timestamp')
traj
#traj.plot_trajectory()

Unnamed: 0,tid,datetime,lat,lng
0,8,2020-06-01 08:56:08.148000+00:00,50.861073,4.465373
1,8,2020-06-01 08:56:09.648000+00:00,50.861100,4.465386
2,8,2020-06-01 08:56:10.398000+00:00,50.861127,4.465398
3,8,2020-06-01 08:56:10.797745+00:00,50.861146,4.465406
4,8,2020-06-01 08:56:24.229685+00:00,50.861756,4.465684
...,...,...,...,...
4937,8,2020-06-04 21:56:59.514958+00:00,50.861842,4.465724
4938,8,2020-06-04 21:56:59.943340+00:00,50.861822,4.465716
4939,8,2020-06-04 21:57:01.415195+00:00,50.861756,4.465684
4940,8,2020-06-04 21:57:14.847135+00:00,50.861146,4.465406


### Compression

In [4]:
compressed = compression.compress(traj,0.01)
compressed
#compressed.plot_trajectory()

Unnamed: 0,tid,datetime,lat,lng
0,8,2020-06-01 08:56:08.148000+00:00,50.861114,4.465392
1,8,2020-06-01 08:56:24.229685+00:00,50.861832,4.465720
2,8,2020-06-01 08:56:26.842959+00:00,50.861881,4.465736
3,8,2020-06-01 08:56:29.028663+00:00,50.861911,4.465593
4,8,2020-06-01 08:56:32.628663+00:00,50.861995,4.465116
...,...,...,...,...
1456,8,2020-06-04 21:56:52.761056+00:00,50.861971,4.465252
1457,8,2020-06-04 21:56:55.602543+00:00,50.861899,4.465658
1458,8,2020-06-04 21:56:56.863518+00:00,50.861854,4.465730
1459,8,2020-06-04 21:57:01.415195+00:00,50.861756,4.465684


In [6]:
%%time
series1 = traj[["datetime","lat","lng"]].values
series2 = compressed[["datetime","lat","lng"]].values


CPU times: user 67.9 ms, sys: 0 ns, total: 67.9 ms
Wall time: 66.9 ms


array([[Timestamp('2020-06-01 08:56:08.148000+0000', tz='UTC'),
        50.86111380026074, 4.4653919529320465],
       [Timestamp('2020-06-01 08:56:24.229685+0000', tz='UTC'),
        50.861831949999996, 4.465719999999999],
       [Timestamp('2020-06-01 08:56:26.842959+0000', tz='UTC'),
        50.86188130220086, 4.465735784924857],
       ...,
       [Timestamp('2020-06-04 21:56:56.863518+0000', tz='UTC'),
        50.8618541978005, 4.465730365075151],
       [Timestamp('2020-06-04 21:57:01.415195+0000', tz='UTC'),
        50.8617555, 4.465684499999998],
       [Timestamp('2020-06-04 21:57:14.847135+0000', tz='UTC'),
        50.86110925, 4.465389899999995]], dtype=object)

In [8]:
def dist(coord1, coord2):
    return geodesic(coord1, coord2).m

In [9]:
def compute_ED(initial, compressed):
    ED = []
    A = initial[0]
    compressed_path = LineString([Point(p[1], p[2]) for p in compressed]) 
    for i in range(len(initial)):
        B =  initial[i]
        point_B = Point(B[1], B[2])
        _point_B = compressed_path.interpolate(compressed_path.project(point_B))
        
        ED.append(dist((_point_B.x,_point_B.y), ((B[1], B[2]))))
    return ED
data = compute_ED(series1, series2)
sum(data)/len(data)

0.04650930360169534

Here, we export the file to compute the speed with movingpandas

In [10]:
traj.to_csv("pymove_traj_df.csv", index=False)
compressed.to_csv("pymove_comp_df.csv", index=False)

In [12]:
trajectory_df = pd.read_csv("pymove_traj_df.csv")
trajectory_df["datetime"] = pd.to_datetime(trajectory_df["datetime"])
compressed_df = pd.read_csv("pymove_comp_df.csv")
compressed_df["datetime"] = pd.to_datetime(compressed_df["datetime"])
def compute_speed_score(initial,compressed):
    prec = None
    go = False
    hist_data = []
    error_col = []
    for index, row in compressed.iterrows():
        if go:
            time1 = pd.to_datetime(prec["datetime"])
            time2 = pd.to_datetime(row["datetime"])
            initial_ = initial[(initial["datetime"] >= time1) & (initial["datetime"] <= time2)]
            speed1 = prec["speed"]
            speed2 = row["speed"]
            speed_compressed_avg = (speed1+speed2)/2
            speed_error = (np.mean(initial_["speed"]) - speed_compressed_avg)
            hist_data.append(speed_error)
            for _ in range(len(initial_)):
                error_col.append(speed_error)
            print()
            print(len(initial_),np.mean(initial_["speed"]), speed_compressed_avg)
        prec = row
        go = True
    return hist_data, error_col 
data, error_col = compute_speed_score(trajectory_df, compressed_df)


5 3.7854630139058125 5.171265825436818

5 3.575140925842117 3.6573861415068647

5 5.545345989065238 3.485481516484877

2 8.782528561969782 7.256029508450891

7 5.5204656212924865 8.064289776685303

7 3.0613021833219567 4.33609296930275

7 3.653325982818059 1.6534844325091873

5 2.008032963358954 1.0069220241576908

5 5.8261094579671 1.296733116971867

5 4.5679388558371805 3.5253658393847043

6 4.967672017933628 4.094058722562345

2 8.590950295776004 7.010971458077127

7 3.3562204911107485 6.588616901169255

5 5.961143299713086 4.245499349704703

7 5.51074746538412 5.855994712583202

6 5.197280530937757 4.443185569794551

5 4.879895729289122 2.15893481513266

7 3.6936703394171744 1.2033774129840977

3 7.837666434390882 6.013737548411073

5 4.479361514885984 5.637335503328134

6 4.6052943377414595 2.768201478195663

7 3.661685840735353 3.7030310059596485

6 4.213782708684497 2.973775162205002

4 6.935245126851417 5.465296594643368

3 8.778769945935624 8.249396132079617

2 8.778764324456

We then import it to compute the mean speed error

In [13]:
compressed["speed-error"] = [data[0]]+data
compressed.to_csv("csv_results/skmod-comp-results.csv", index=False)


In [14]:
np.mean(np.abs(data))

1.7211104299851978