## Load data

In [14]:
import pickle
with open(f"../../data/1140_sample_migrated_GLD_dino+bro.pkl", 'rb') as f:
    data = pickle.load(f)
data_obs = data['data']
meta_obs = data['meta']

In [15]:
# Splits data
# Identifiers
with open("../../data/piezo_multiple_bro_id.pkl", "rb") as f:
    piezo_multiple_bro_id = pickle.load(f)
# Data
with open("../../data/1233_sample_migrated_GLD_split.pkl", "rb") as f:
    splits_dataset = pickle.load(f)
splits_data = splits_dataset['data']
splits_meta = splits_dataset['meta']

## Helper functions

In [16]:
import random
import numpy as np
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import wavelet_fingerprint

In [17]:
def get_fingerprints(data, meta):
    fingerprint_dino, fingerprint_bro = {}, {}
    for gld in data.keys():
        dino_ts = data[gld]['dino']
        coords_dino = [meta[gld]['x_dino'], meta[gld]['y_dino']]
        coords_bro = [meta[gld]['x_bro'], meta[gld]['y_bro']]
        bro_ts = data[gld]['bro']
        try: 
            fingerprint_dino[gld] = wavelet_fingerprint.compute_fingerprint(dino_ts, coords_dino)['vector']
            fingerprint_bro[gld] = wavelet_fingerprint.compute_fingerprint(bro_ts, coords_bro)['vector']
        except ValueError as e:
            print(f"Skipping {gld}: {e}")
            continue
    return fingerprint_dino, fingerprint_bro

In [18]:
def calc_precision_k(cmp, k):
    cmp_k = np.sum(cmp[:, :k], axis=1) / k
    mean_cmp_k = np.mean(cmp_k)
    return mean_cmp_k, cmp_k

## Fingerprint on test-dataset

In [19]:
fp_test_dino, fp_test_bro = get_fingerprints(data_obs, meta_obs)

In [20]:
nn_test = NearestNeighbors(metric="l1")
nn_test.fit(np.array(list(fp_test_dino.values())))

In [21]:
# evaluation: find nearest neighbors (trained on DINO data) to BRO fingerprints
k = 3
dists, indices = nn_test.kneighbors(np.stack(list(fp_test_bro.values())), n_neighbors=k)
label_test = np.array(list(fp_test_bro.keys()))
retrieved_label = np.array([[list(fp_test_bro.keys())[i] for i in neigh] for neigh in indices])
cmp = label_test[:, None] == retrieved_label

In [22]:
neigh_level = 1
prec_k, prec_each_k = calc_precision_k(cmp, k=neigh_level)
print(f"PREC@{neigh_level}={prec_k:.3f}")

PREC@1=1.000


## Fingerprint on split dataset

In [23]:
fp_split_dino, fp_split_bro = get_fingerprints(splits_data, splits_meta)  

In [24]:
nn_split = NearestNeighbors(metric="l1")
nn_split.fit(np.array(list(fp_split_bro.values())))

In [25]:
# evaluation: find nearest neighbors (trained on BRO data) to DINO fingerprints
k = 3
dists, indices = nn_split.kneighbors(np.stack(list(fp_split_dino.values())), n_neighbors=k)
#label_test = np.array(list(fp_split_dino.keys()))
label_test = []
for gld in fp_split_dino.keys():
    if gld in splits_data:
        piezo_dbk = splits_meta[gld].get('PIEZOMETER_DBK')
        label_test.append(str(piezo_dbk))

retrieved_label = [[label_test[i] for i in neigh] for neigh in indices]
cmp = np.array(label_test)[:, None] == np.array(retrieved_label)

In [26]:
prec_k, prec_each_k = calc_precision_k(cmp, k=1)
print(f"PREC@{1}={prec_k:.3f}")
prec_k, prec_each_k = calc_precision_k(cmp, k=3)
print(f"PREC@{3}={prec_k:.3f}")

PREC@1=0.838
PREC@3=0.702
