In [None]:
import pandas as pd
import numpy as np
import os

import pickle
import h5py
from subprocess import call

import sys
sys.path.append('../src')
from utils import train_predict_svm, train_predict_mlp, eval_per_attack, execute_test_loop_k, execute_test_loop_retrace


city = "sf"
data_path = f'/home/schestakov/projects/re-identification/data/{city}'
data_path_dl = f'/home/schestakov/projects/re-identification/data/{city}/dl_models'


train_split = 0.7

# Load Data
total_set_labels = pickle.load(open(os.path.join(data_path, "total_set_labels.pkl"), "rb"))
total_set_description = pickle.load(open(os.path.join(data_path, "total_set_description.pkl"), "rb"))

In [None]:
from sklearn.metrics import accuracy_score, f1_score
from sklearn.ensemble import RandomForestClassifier
import time

models ={
    "ReTrace1": "_s_retrace.pkl",
    #"CLT-sim": "_s_cltsim.pkl",
    #"t2vec": "_s_t2vec.pkl", 
    
}

full_eval_results = {}
avg_eval_results = {}

for name, file_name in models.items():
    # Load Embeddings
    own_s = pickle.load(open(os.path.join(data_path_dl, "own" + file_name), "rb"))
    total_s = pickle.load(open(os.path.join(data_path_dl, "total" + file_name), "rb"))

    start = time.time()
    print(f"Evaluating {name}.")
    print("Obtaining distances")
    distances, train_time = execute_test_loop_k(own_s, total_s, k=3)

    print("Training predictor")
    # Train a Prediction model and predict
    num_train_samples = int( len(total_s) * train_split )
    predictions = train_predict_svm(distances[:num_train_samples], distances[num_train_samples:], total_set_labels[:num_train_samples])
    end = time.time()
    elapsed_time = end - start

    # Evaluate overall
    accuracy = accuracy_score(total_set_labels[num_train_samples:],predictions)
    f1 = f1_score(total_set_labels[num_train_samples:], predictions)
    print(f"{name} Acc: {accuracy:.3f}   F1: {f1:.3f} time: {elapsed_time:.3f}")

    # Evaluate for each attack individually
    attacks_res = eval_per_attack(predictions, total_set_description[num_train_samples:], total_set_labels[num_train_samples:])

    avg_eval_results[name] = [accuracy,f1]
    full_eval_results[name] = attacks_res
    full_eval_results[name] = attacks_res
    print("*************")

In [None]:
# Convert Results to DF
result_data= []
for model_name, attack_res in full_eval_results.items():
    acc = avg_eval_results[model_name][0]
    f1 = avg_eval_results[model_name][1]
    result_data.append([model_name, elapsed_time, 1, 'AVG', acc, f1])
    for attack_name, value in attack_res.items():
        result_data.append([model_name, elapsed_time, 1, attack_name, value[0], value[1]])

df = pd.DataFrame(result_data, columns=['Model', 'Time', 'cores', 'Attack', 'Accuracy', 'F1'])
df

In [None]:
# Safe as CSV
from datetime import datetime
result_path = 'results'
timestamp = datetime.now().strftime("%m.%d._%H.%M")
df.to_csv(f"{result_path}/retrace_{timestamp}_{city}.csv")