In [1]:
import pandas as pd
import numpy as np
import yaml
import os
import pickle
import random
import sys
from tqdm import tqdm
import glob
from shapely.geometry import LineString


sys.path.append('../src')

import threads as threads
from utils import create_attacked_sets, pkl2h5, read_train_file

os.environ["CUDA_VISIBLE_DEVICES"]="0"

data_path = '/home/schestakov/data/re-identification/sf/orig/cabdata/'
data_orig_path = '/home/schestakov/data/re-identification/sf/orig/'
data_save_path = '/home/schestakov/data/re-identification/sf/no_interp'

In [2]:
# read data 

all_files = glob.glob(os.path.join(data_path , "*.txt"))

data = []

for filename in all_files:
    tdf = pd.read_csv(filename, index_col=None, header=None, delimiter=" ")
    tdf["tax_id"] = filename.split("/")[-1].split(".")[0].split("_")[1]
    data.append(tdf)

df = pd.concat(data, axis=0, ignore_index=True)
df = df.rename(columns={0: "lat", 1: "long", 2: "occupied", 3: "timestamp"})

In [3]:
df

Unnamed: 0,lat,long,occupied,timestamp,tax_id
0,37.78682,-122.40309,1,1213039519,exskafvo
1,37.78225,-122.39781,1,1213039404,exskafvo
2,37.77799,-122.39211,1,1213039340,exskafvo
3,37.77831,-122.39214,0,1213039332,exskafvo
4,37.77587,-122.39437,0,1213039268,exskafvo
...,...,...,...,...,...
11219950,37.75824,-122.39216,0,1211034008,ubnankke
11219951,37.75038,-122.39045,0,1211033948,ubnankke
11219952,37.74989,-122.39314,0,1211033888,ubnankke
11219953,37.75120,-122.39524,0,1211033828,ubnankke


In [4]:
import time 
# group for each taxi
traj_list = []
for _, g in tqdm(df.groupby("tax_id")):
    # group each occupied trajectory
    trajectories_occu = g[g['occupied'] == 1].groupby((g['occupied'] != 1).cumsum())
    trajectories_nooccu = g[g['occupied'] == 0].groupby((g['occupied'] != 0).cumsum())
    for _, t in trajectories_occu:
        
        if t.shape[0] < 5:
            continue
        data = t.to_numpy()
        data = data[::-1]
        stamps = data[:,3] - data[0,3]

        traj = []
        for idx in range(data.shape[0]):
            traj.append([data[idx,1], data[idx,0],stamps[idx]])
        traj_list.append(traj)
    
    for _, t in trajectories_nooccu:
        if t.shape[0] < 5:
            continue
        data = t.to_numpy()
        data = data[::-1]
        stamps = data[:,3] - data[0,3]

        traj = []
        for idx in range(data.shape[0]):
            traj.append([data[idx,1], data[idx,0],stamps[idx]])
        traj_list.append(traj)


100%|██████████| 536/536 [01:07<00:00,  8.00it/s]


In [5]:
# Safe no interpolated
with open(os.path.join(data_save_path , "traj_list.pkl"), 'wb') as handle:
    pickle.dump(traj_list, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [7]:
from scipy import interpolate
# Interpolate trajectories to hjave 15secs intervalls
traj_list_i = []
for traj in tqdm(traj_list):
    traj = np.array(traj)
    if traj[-1,2] < 15:
        continue    

    x = traj[:,0]
    y = traj[:,1]
    t = traj[:,2]

    ticks = int(t[-1]/15) # 15 sec intervalls

    # Interpolate
    tck,u=interpolate.splprep([x,y,t],s=0.0)
    x_i, y_i, t_i= interpolate.splev(np.linspace(0,1,ticks),tck)

    t_new =  np.vstack((y_i, x_i, t_i.astype(int))).T
    traj_list_i.append(t_new.tolist())

100%|██████████| 695677/695677 [02:45<00:00, 4204.60it/s] 


In [19]:
idx = 100

t_i = traj_list_i[idx]
t_i = [[x,y] for x,y,t in t_i]

t = traj_list[idx]
t = [[y,x] for x,y,t in t]

In [20]:
t

[[37.80093, -122.43455],
 [37.80173, -122.42886],
 [37.80085, -122.42458],
 [37.79659, -122.4236],
 [37.7947, -122.42334],
 [37.79092, -122.42253],
 [37.7895, -122.42226],
 [37.78452, -122.42122],
 [37.78353, -122.42095],
 [37.78096, -122.42048],
 [37.77839, -122.42003],
 [37.77559, -122.41952],
 [37.77063, -122.41807],
 [37.76698, -122.41776],
 [37.76191, -122.4172],
 [37.75434, -122.41641],
 [37.75264, -122.41093],
 [37.75276, -122.40878]]

In [21]:
import trace_plotting
trace_plotting.plot_two_traces(t_i, t)

In [None]:
with open(os.path.join(data_save_path , "traj_list.pkl"), 'wb') as handle:
    pickle.dump(traj_list_i, handle, protocol=pickle.HIGHEST_PROTOCOL)