In [None]:
import pandas as pd
import numpy as np
import yaml
import os
import pickle
import random
import sys
from tqdm import tqdm

sys.path.append('../src')

import threads as threads
from utils import create_modified_set, pkl2h5, read_train_file, pkl2h5_wo_time

os.environ["CUDA_VISIBLE_DEVICES"]="0"

data_path = '/home/schestakov/data/re-identification/porto/'
#data_save_path = '/home/schestakov/data/re-identification/porto/db_exp2'
data_save_path = '/home/schestakov/projects/re-identification/data/porto/db'

In [None]:
# Refer to porto_data_analysis.ipynb to see how to get from original data to list of trajectories

In [None]:
# List of Trajectories
# Trajectory: [[x1,y1,t1], [x2,y2,t2], ... , [xn,yn,tn]]
traj_list = pickle.load(open(os.path.join(data_path, "traj_list_notime.pkl"), "rb"))

n_samples  = len(traj_list)
n_samples

In [None]:
# Remove all trajectories with less than 10 points
new_traj_list = []
for t in traj_list:
    l = len(t)
    if l < 10:
        new_traj_list.append(t)
print(len(new_traj_list))

In [None]:
traj_list = new_traj_list

In [None]:
random.shuffle(traj_list)

In [None]:
# Split by 80%, 10%, 10%
train = traj_list[:int(0.8*n_samples)]
val = traj_list[int(0.8*n_samples):int(0.9*n_samples)]
test = traj_list[int(0.9*n_samples):]

In [None]:
print(len(train))
print(len(test))

In [None]:
# Save train, val, test
with open(os.path.join(data_save_path, 'train.pkl'), "wb") as f:
    pickle.dump(train, f)
with open(os.path.join(data_save_path, 'val.pkl'), "wb") as f:
    pickle.dump(val, f)
with open(os.path.join(data_save_path, 'test.pkl'), "wb") as f:
    pickle.dump(test, f)

In [None]:
# Load
train = pickle.load(open(os.path.join(data_save_path, "train.pkl"), "rb"))
val = pickle.load(open(os.path.join(data_save_path, "val.pkl"), "rb"))
test = pickle.load(open(os.path.join(data_save_path, "test.pkl"), "rb"))

### Apply Threads

In [None]:
###### Add Threads to test #######

# split test into own and other set
# For each thread: 
#     - attack on own set
#     - attack on other set (add to not evaluating finding attack structures in trace)
#     - add labels: true if attack on own set, else false

#own_share = 0.1

#own_set = test[:int(own_share*len(test))]
#other_set = test[int(own_share*len(test)):]

own_size = 1000
own_set = test[:own_size]
rest = test[own_size:]


print(len(own_set))
print(len(rest))

In [None]:
attacks_dict = {
    'Multi:RAP+DS+GWN': [threads.resample_along_path, threads.downsample, threads.add_signal_noise],
    'GWN:white_noise': threads.add_white_noise,
    'SNR:signal_to_noise': threads.add_signal_noise,
    'OSNR:outliers_with_snr': threads.add_outliers_with_signal_to_noise_ratio,
    'RIP:rounding': threads.remove_non_significant_bits, 
    'DS:downsample': threads.downsample,
    'RRPP:replace_random_points_with_path': threads.replace_random_points_with_path,
    'RNSPP:replace_non_skeleton_points_with_path': threads.replace_non_skeleton_points_with_path,
    'RAP:resample_along_path': threads.resample_along_path,
    'C:cropping': threads.cropping,
    }



In [None]:

other_set = rest[:own_size]


total_set = []
total_set_labels = []
total_set_description = []


for attack_name, attack_function in attacks_dict.items():

    # Create own set
    mod_own, own_labels, own_attack_descr = create_modified_set(own_set, attack_name, attack_function, is_own_set = True)
    mod_other, other_labels, other_attack_descr = create_modified_set(other_set, attack_name, attack_function, is_own_set = False)
    
    # Add to total sets
    total_set = total_set + mod_own + mod_other
    total_set_labels = total_set_labels + own_labels + other_labels
    total_set_description = total_set_description + own_attack_descr + other_attack_descr


In [None]:
# Shuffle all sets
zipped_set = list(zip(total_set, total_set_labels, total_set_description))
random.shuffle(zipped_set)
total_set, total_set_labels, total_set_description = zip(*zipped_set)

In [None]:
# Save own_set, total_set, total_set_description
with open(os.path.join(data_save_path, 'own_set.pkl'), "wb") as f:
    pickle.dump(own_set, f)
with open(os.path.join(data_save_path, 'total_set.pkl'), "wb") as f:
    pickle.dump(total_set, f)
with open(os.path.join(data_save_path, 'total_set_labels.pkl'), "wb") as f:
    pickle.dump(total_set_labels, f)
with open(os.path.join(data_save_path, 'total_set_description.pkl'), "wb") as f:
    pickle.dump(total_set_description, f)

### Preprecess further for DL models 

In [None]:
julia_path = "/home/schestakov/downloads/julia/julia-1.8.5/bin/julia"
hyper_param_path = "./hyper-parameters.json"

In [None]:
data_save_path = os.path.join(data_save_path, "dl_models")

In [None]:
##### Training ####

# IMPORTANT: 
# 1. Check that we save the trajectories with lat,lon in this order: [-8.619489, 41.175018]
#    Therefore, if traj_list has it like [41.148009, -8.619777] we need to swap the order (swap_lon_lat = True)
#              Else, we can leave it as it is (swap_lon_lat = False)
# 2. Check if we have time in the trajectories -> (remove_time = False or True)

cityname = "porto"
# for training
pkl2h5(train, data_save_path, "train.h5", swap_lon_lat = False, remove_time = False)
pkl2h5(val, data_save_path, "val.h5", swap_lon_lat = False, remove_time = False)
# To build the spatial region we create a city.h5 file with all trajectories
# We use from last iteration, as it will be the same
#pkl2h5(traj_list, data_save_path, f"{cityname}.h5", swap_lon_lat = False, remove_time = False)

In [None]:
from subprocess import call
# This function creates preprocesses train.h5 and val.h5 for training. Output is train.src, train.trg, val.src, val.trg
call([julia_path, "preprocess.jl", "--datapath", data_save_path, "--parampath", hyper_param_path])

In [None]:
# For our model we convert into numpy as save as .npz
max_len = 100
train_src, train_src_len = read_train_file(os.path.join(data_save_path, "train.src"), max_len)
train_trg, train_trg_len = read_train_file(os.path.join(data_save_path, "train.trg"), max_len)
val_src, val_src_len = read_train_file(os.path.join(data_save_path, "val.src"), max_len)
val_trg, val_trg_len = read_train_file(os.path.join(data_save_path, "val.trg"), max_len)

In [None]:
train_src = np.array(train_src)
train_trg = np.array(train_trg)
val_src = np.array(val_src)
val_trg = np.array(val_trg)

train_src_len = np.array(train_src_len)
train_trg_len = np.array(train_trg_len)
val_src_len = np.array(val_src_len)
val_trg_len = np.array(val_trg_len)

print(train_src.shape)
print(train_trg.shape)
print(val_src.shape)
print(val_trg.shape)

In [None]:
# Also we dont use validation set so stack it with training set
train_src = np.concatenate((train_src,val_src), axis=0)
train_trg = np.concatenate((train_trg,val_trg), axis=0)
train_src_len = np.concatenate((train_src_len,val_src_len), axis=0)
train_trg_len = np.concatenate((train_trg_len,val_trg_len), axis=0)



In [None]:
# Save compressed
np.savez_compressed(os.path.join(data_save_path,"train.npz"), src=train_src, trg=train_trg, src_len=train_src_len, trg_len=train_trg_len)

In [None]:
# Loading example:
loaded = np.load(os.path.join(data_save_path,"train.npz"))
src= loaded['src']
trg= loaded['trg']
src_len= loaded['src_len']
trg_len= loaded['trg_len']

In [None]:
##### Evaluation ####


# Format of .pkl files
# List of Trajectories: [T1, T2, ... , Tn]
# Trajectory: T = [[x1,y1,t1], [x2,y2,t2], ... , [xn,yn,tn]]

load = False
if load:
    own_set = pickle.load(open(os.path.join(data_save_path, "own_set.pkl"), "rb"))
    total_set = pickle.load(open(os.path.join(data_save_path, "total_set.pkl"), "rb"))

# We need to convert train, val, own_set, total_set
print(f"own: {len(own_set)} \ntotal: {len(total_set)}")

# For evaluation
pkl2h5(own_set, data_save_path, "own.h5", swap_lon_lat = False, remove_time = False)
pkl2h5(total_set, data_save_path, "total.h5", swap_lon_lat = False, remove_time = False)

In [None]:
from subprocess import call
# From .h5 files map to grid and safe as .t file
filenames = ["own", "total"]
for name in filenames:
    call([julia_path, "traj2gridseq.jl", "--datapath", data_save_path, "--filename", name,  "--parampath", hyper_param_path])