# Qualitative analysis of Compression

In [31]:
from tracktable.domain.terrestrial import TrajectoryPointReader
from tracktable.render.render_trajectories import render_trajectories, render_trajectories_separate
from tracktable.applications.assemble_trajectories import AssembleTrajectoryFromPoints
from tracktable.core.geomath import length, recompute_speed, simplify
import pandas as pd
import numpy as np
from dtaidistance import dtw_ndim

from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from geopy.distance import geodesic 
from pyproj import Geod
from shapely.ops import nearest_points
import json

### Trajectory loading

In [20]:
PATH = "datasets/cars_8.csv"
def load_cars_trajectories(PATH):
    ## Set up the point reader
    reader = TrajectoryPointReader()
    reader.input = open(PATH, 'r')
    reader.comment_character = '#'
    reader.field_delimiter = ','
    reader.null_value = 'NaN'
    reader.object_id_column = 0
    reader.timestamp_column = 1
    reader.coordinates[0] = 3     # longitude
    reader.coordinates[1] = 2     # latitude
    
    ## Put the point reader into the trajectoriesBuilder
    builder = AssembleTrajectoryFromPoints()
    builder.input = reader
    builder.separation_time = None
    return list(builder)

trajectories = load_cars_trajectories(PATH)
object_id = []
timestamp = []
longitude = []
latitude = []
speed = []
for traj in trajectories:
    recompute_speed(traj)
    for trajectory_point in traj:
        speed.append(trajectory_point.property("speed"))
        object_id.append(trajectory_point.object_id)
        timestamp.append(trajectory_point.timestamp)
        longitude.append(trajectory_point[0])
        latitude.append(trajectory_point[1])
        trajectory_df = pd.DataFrame({"id":object_id,"time":timestamp,"longitude":longitude,"latitude":latitude, "speed":speed})
trajectory_df

INFO:tracktable.applications.assemble_trajectoriesAssembleTrajectoryFromPoints:New trajectories will be declared after a separation of None distance units between two points or a time lapse of at least None (hours, minutes, seconds).
INFO:tracktable.applications.assemble_trajectoriesAssembleTrajectoryFromPoints:Trajectories with fewer than 2 points will be discarded.
INFO:tracktable.applications.assemble_trajectoriesAssembleTrajectoryFromPoints:Done assembling trajectories. 1 trajectories produced and 0 discarded for having fewer than 2 points.


[2024-01-06 14:07:31.173517] [0x00007ff41a275740] [info]    Done reading points. Generated 4942 points correctly and discarded 1 due to parse errors.



Unnamed: 0,id,time,longitude,latitude,speed
0,8,2020-06-01 08:56:08+00:00,4.465373,50.861073,11.348936
1,8,2020-06-01 08:56:09+00:00,4.465386,50.861100,11.348936
2,8,2020-06-01 08:56:10+00:00,4.465398,50.861127,11.348929
3,8,2020-06-01 08:56:10+00:00,4.465406,50.861146,0.000000
4,8,2020-06-01 08:56:24+00:00,4.465684,50.861756,18.147267
...,...,...,...,...,...
4937,8,2020-06-04 21:56:59+00:00,4.465724,50.861842,0.000000
4938,8,2020-06-04 21:56:59+00:00,4.465716,50.861822,0.000000
4939,8,2020-06-04 21:57:01+00:00,4.465684,50.861756,13.919789
4940,8,2020-06-04 21:57:14+00:00,4.465406,50.861146,19.543211


### Compression

In [18]:
tolerance = 10/111139 # 1 meter
def compress_all(trajectories,tolerance):
    n = 0
    compressed_trajectories = []
    for trajectory in trajectories:
        compressed_traj = simplify(trajectory=trajectory, tolerance=tolerance)
        n += len(compressed_traj)
        compressed_trajectories.append(compressed_traj)
    print("size after compress", n)      
    return compressed_trajectories
compressed_trajectories = compress_all(trajectories,tolerance)
object_id = []
timestamp = []
longitude = []
latitude = []
speed = []
for traj in compressed_trajectories:
    recompute_speed(traj)
    for trajectory_point in traj:
        speed.append(trajectory_point.property("speed"))
        object_id.append(trajectory_point.object_id)
        timestamp.append(trajectory_point.timestamp)
        longitude.append(trajectory_point[0])
        latitude.append(trajectory_point[1])
        compressed_df = pd.DataFrame({"id":object_id,"time":timestamp,"longitude":longitude,"latitude":latitude, "speed":speed})
compressed_df

size after compress 23


Unnamed: 0,id,time,longitude,latitude,speed
0,8,2020-06-01 08:56:08+00:00,4.465373,50.861073,13.221354
1,8,2020-06-01 09:01:30+00:00,4.453159,50.868399,13.221354
2,8,2020-06-01 09:06:08+00:00,4.480321,50.87241,25.348898
3,8,2020-06-02 08:42:16+00:00,4.465373,50.861073,0.069485
4,8,2020-06-02 08:47:38+00:00,4.453159,50.868399,13.221354
5,8,2020-06-02 08:52:13+00:00,4.480321,50.87241,25.625432
6,8,2020-06-03 09:44:32+00:00,4.465373,50.861073,0.065938
7,8,2020-06-03 09:49:52+00:00,4.453159,50.868399,13.303987
8,8,2020-06-03 09:54:16+00:00,4.480321,50.87241,26.693158
9,8,2020-06-04 08:03:17+00:00,4.465373,50.861073,0.074039


In [6]:
render_trajectories(compressed_trajectories)

In [21]:
series1 = trajectory_df[["time","latitude","longitude"]].values
series2 = compressed_df[["time","latitude","longitude"]].values
series1

array([[Timestamp('2020-06-01 08:56:08+0000', tz='UTC'),
        50.86107290000001, 4.46537349999999],
       [Timestamp('2020-06-01 08:56:09+0000', tz='UTC'),
        50.861100166845816, 4.465385801954695],
       [Timestamp('2020-06-01 08:56:10+0000', tz='UTC'),
        50.86112743367567, 4.465398103909399],
       ...,
       [Timestamp('2020-06-04 21:57:01+0000', tz='UTC'), 50.8617555,
        4.465684499999998],
       [Timestamp('2020-06-04 21:57:14+0000', tz='UTC'),
        50.86114559999999, 4.465406299999998],
       [Timestamp('2020-06-04 21:57:16+0000', tz='UTC'),
        50.86107290000001, 4.46537349999999]], dtype=object)

In [27]:
def dist(coord1, coord2):
    return geodesic(coord1, coord2).m

In [28]:
def compute_ED(initial, compressed):
    ED = []
    A = initial[0]
    compressed_path = LineString([Point(p[1], p[2]) for p in compressed]) 
    for i in range(len(initial)):
        B =  initial[i]
        point_B = Point(B[1], B[2])
        _point_B = compressed_path.interpolate(compressed_path.project(point_B))
        
        ED.append(dist((_point_B.x,_point_B.y), ((B[1], B[2]))))
    return ED
data = compute_ED(series1, series2)
sum(data)/len(data)

74.77103472055629

In [32]:
def compute_speed_score(initial,compressed):
    prec = None
    go = False
    hist_data = []
    error_col = []
    for index, row in compressed.iterrows():
        if go:
            time1 = pd.to_datetime(prec["time"])
            time2 = pd.to_datetime(row["time"])
            initial_ = initial[(initial["time"] >= time1) & (initial["time"] <= time2)]
            speed1 = prec["speed"]
            speed2 = row["speed"]
            speed_compressed_avg = (speed1+speed2)/2
            speed_error = (np.mean(initial_["speed"]) - speed_compressed_avg)
            hist_data.append(speed_error)
            for _ in range(len(initial_)):
                error_col.append(speed_error)
            print()
            print(len(initial_),np.mean(initial_["speed"]), speed_compressed_avg)
        prec = row
        go = True
    return hist_data, error_col 
data, error_col = compute_speed_score(trajectory_df, compressed_df)


306 8.359271496141245 13.22135356212721

268 13.222606862910268 19.285125929966725

617 10.033890657042363 12.70919151979682

316 7.825001643297938 6.645419151957303

273 11.321649780247007 19.423392647954763

588 10.401047135299086 12.84568462653886

305 8.801478910991422 6.684962270592955

258 12.407034658977016 19.99857253895687

597 10.481188008041979 13.383598751492588

305 8.598103849681307 6.816121390686319

275 13.180962948469618 19.09902076998531

0 nan 12.304126205757116

332 8.228025454266572 5.8396361778095836

589 9.42868098211282 5.889020219234971

283 12.009033389022202 12.090881123141688

302 9.075557807390547 18.0599225777992

564 10.44653528252144 6.0391102424638525

262 13.621530128358062 12.47187079220793

333 8.176761813435066 18.4887522205323

580 10.72765655646145 6.087765749934817

251 13.088665688927733 12.601159613793921

603 7.888729729361618 12.680610224474819


In [33]:
compressed_df["speed-error"] = [data[0]]+data
compressed_df.to_csv("csv_results/tracktable-comp-results.csv", index=False)


In [34]:
np.mean(np.abs(data))

nan

In [38]:
data = np.array(data)
data

array([ -4.86208207,  -6.06251907,  -2.67530086,   1.17958249,
        -8.10174287,  -2.44463749,   2.11651664,  -7.59153788,
        -2.90241074,   1.78198246,  -5.91805782,          nan,
         2.38838928,   3.53966076,  -0.08184773,  -8.98436477,
         4.40742504,   1.14965934, -10.31199041,   4.63989081,
         0.48750608,  -4.7918805 ])

In [41]:
data = data[~np.isnan(data)]

In [42]:
np.mean(np.abs(data))

4.115189766398392