In [1]:
DATA_PATH = 'data/motionsense.csv'
QRELS_PATH = 'evaluation/qrels.json'

In [2]:
ANALYSIS_COL = 'userAcceleration.z'
SEED = 2024
PRECOMPUTED_DISTANCES = f"evaluation/adtw/{ANALYSIS_COL.replace('.', '-')}_distances.npy"
RUN_FILE = f"evaluation/runs/dtw_{ANALYSIS_COL.replace('.', '-')}.json"

# Carregando os dados processados

In [3]:
import pandas as pd

In [4]:
df = pd.read_csv(DATA_PATH, index_col=0)

In [5]:
df.head()

Unnamed: 0,attitude.roll,attitude.pitch,attitude.yaw,gravity.x,gravity.y,gravity.z,rotationRate.x,rotationRate.y,rotationRate.z,userAcceleration.x,userAcceleration.y,userAcceleration.z,act,id,trial
0,1.528132,-0.733896,0.696372,0.741895,0.669768,-0.031672,0.316738,0.77818,1.082764,0.294894,-0.184493,0.377542,0.0,0.0,1.0
1,1.527992,-0.716987,0.677762,0.753099,0.657116,-0.032255,0.842032,0.424446,0.643574,0.219405,0.035846,0.114866,0.0,0.0,1.0
2,1.527765,-0.706999,0.670951,0.759611,0.649555,-0.032707,-0.138143,-0.040741,0.343563,0.010714,0.134701,-0.167808,0.0,0.0,1.0
3,1.516768,-0.704678,0.675735,0.760709,0.647788,-0.04114,-0.025005,-1.048717,0.03586,-0.008389,0.136788,0.094958,0.0,0.0,1.0
4,1.493941,-0.703918,0.672994,0.760062,0.64721,-0.05853,0.114253,-0.91289,0.047341,0.199441,0.353996,-0.044299,0.0,0.0,1.0


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1412865 entries, 0 to 1412864
Data columns (total 15 columns):
 #   Column              Non-Null Count    Dtype  
---  ------              --------------    -----  
 0   attitude.roll       1412865 non-null  float64
 1   attitude.pitch      1412865 non-null  float64
 2   attitude.yaw        1412865 non-null  float64
 3   gravity.x           1412865 non-null  float64
 4   gravity.y           1412865 non-null  float64
 5   gravity.z           1412865 non-null  float64
 6   rotationRate.x      1412865 non-null  float64
 7   rotationRate.y      1412865 non-null  float64
 8   rotationRate.z      1412865 non-null  float64
 9   userAcceleration.x  1412865 non-null  float64
 10  userAcceleration.y  1412865 non-null  float64
 11  userAcceleration.z  1412865 non-null  float64
 12  act                 1412865 non-null  float64
 13  id                  1412865 non-null  float64
 14  trial               1412865 non-null  float64
dtypes: float64(15)
memor

In [7]:
df['act'].nunique() # lembrar de mapear id para string da classe

6

# Separando pares X e y

In [8]:
import numpy as np

In [9]:
df['act'].unique()

array([0., 1., 2., 3., 4., 5.])

In [10]:
subject_id = 1
act_id = 0
subject_mask = df['id'] == subject_id
act_mask = df['act'] == act_id

X = []
y = []

for label in df['act'].unique():
  for subj_id in df['id'].unique():
    subj_mask = df['id'] == subj_id
    act_mask = df['act'] == label
    filtered_df = df[subj_mask & act_mask].reset_index()

    X.append(filtered_df[ANALYSIS_COL].values)
    y.append(label)

In [11]:
np.unique(y, return_counts=True)

(array([0., 1., 2., 3., 4., 5.]), array([24, 24, 24, 24, 24, 24]))

# Preparando avaliação

In [12]:
from ranx import Qrels

In [13]:
qrels = Qrels.from_file(QRELS_PATH)

# Executando a busca por similaridade

In [14]:
from ranx import Run
from aeon.distances import dtw_pairwise_distance

In [15]:
def compute_neighbors_by_similarity(
    distances: np.array,
    topk: int = 100) -> dict:

  run_dict = {}

  for qid in range(distances.shape[0]):
    all_sims = distances[qid]
    topk = np.argsort(all_sims)

    # ignorando o proprio exemplo de busca
    estimated_ids = topk[1:]
    scores = list(range(len(estimated_ids)))[::-1]

    run_dict[str(qid)] = {str(n): float(score) for n, score in zip(estimated_ids, scores)}

  return run_dict

In [16]:
PRECOMPUTED_DISTANCES

'evaluation/adtw/userAcceleration-z_distances.npy'

In [17]:
distances = np.load(PRECOMPUTED_DISTANCES)

In [18]:
run_dict = compute_neighbors_by_similarity(distances)

# Avaliando métricas de RI

In [19]:
from ranx import evaluate

In [20]:
METRICS = ["recall@10", 'map@10', 'hit_rate@10', 'mrr@10']
# METRICS = 'mrr@10'

In [21]:
run = Run(run_dict)

In [22]:
metrics = evaluate(qrels, run, METRICS)
metrics

{'recall@10': 0.21846064814814814,
 'map@10': 0.1649327371766608,
 'hit_rate@10': 0.9861111111111112,
 'mrr@10': 0.7505649250440916}

# Serializando Run

In [23]:
RUN_FILE

'evaluation/runs/dtw_userAcceleration-z.json'

In [24]:
run.save(RUN_FILE)