# Prediction Results with Audio Annotations on Summary Vectronics Data

**Last Modified**: Jul 29, 2025

In this notebook, we summarize and visualize the results for behavior classification with audio annotations on summary sttaistics of Vectronics acceleration data. The training and testing data splits are created randomly for this experiment. We conduct hyperparameter tuning and look at the prediction results f the best performing model.

In [1]:
%load_ext autoreload
%autoreload 2

In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import AutoDateLocator, AutoDateFormatter
import seaborn as sns
import os
import sys
from scipy.stats import wasserstein_distance
from datetime import datetime
from matplotlib.dates import DateFormatter
from tqdm import tqdm
import json
sys.path.append('.')
sys.path.append('../')

from src.utils.data_prep import (combined_annotations,
                                create_matched_data,
                                create_max_windows,
                                create_summary_data,
                                create_data_splits,
                                setup_dataloaders,
                                give_balanced_weights)

from src.utils.io import (get_matched_data_path,
                          get_matched_metadata_path,
                          get_matched_summary_path,
                          get_metadata_path,
                          get_video_labels_path,
                          get_audio_labels_path,
                          get_figures_dir,
                          get_project_root,
                          get_results_dir,
                          )

from config.settings import (RAW_COLLAPSE_BEHAVIORS_MAPPING_W_TROTTING,
                             RAW_COLLAPSE_BEHAVIORS_MAPPING_WO_TROTTING,
                             RAW_BEHAVIORS_W_TROTTING,
                             RAW_BEHAVIORS_WO_TROTTING,
                             SAMPLING_RATE,
                             id_mapping
                             )

from src.utils.functionalities import (sliced_wasserstein_distance,
                                      ot_align,
                                      minmax_scale)
from config.paths import (HISTORIC_ACC)

In [6]:
# Graphing Parameters
import matplotlib as mpl
mpl.rcParams['lines.markersize'] = 12
mpl.rcParams['lines.linewidth'] = 1
mpl.rcParams['xtick.labelsize'] = 20
mpl.rcParams['ytick.labelsize'] = 20
mpl.rcParams["axes.labelsize"] = 22
mpl.rcParams['legend.fontsize'] = 18
mpl.rcParams['axes.titlesize'] = 25
mpl.rcParams['text.usetex'] = True

In [9]:
# load matched acceleration and label pairs data, metadata, and summary

all_annotations = combined_annotations(video_path=get_video_labels_path(), 
                                        audio_path=get_audio_labels_path(),
                                        id_mapping=id_mapping) # load annotations 

all_annotations.Timestamp_start = pd.to_datetime(all_annotations.Timestamp_start)
all_annotations.Timestamp_end = pd.to_datetime(all_annotations.Timestamp_end)
all_annotations['duration'] = (all_annotations.Timestamp_end - all_annotations.Timestamp_start).dt.total_seconds()
all_annotations['Behavior'] = all_annotations['Behavior'].replace(RAW_COLLAPSE_BEHAVIORS_MAPPING_WO_TROTTING)
all_annotations = all_annotations[all_annotations.Behavior.isin(RAW_BEHAVIORS_WO_TROTTING)]

In [10]:
print(all_annotations["duration"].describe())
value = 30
quantile = np.mean(all_annotations["duration"] <= value)
print(f"Value {value}s is approximately at the {quantile*100:.2f}th percentile.")

count    10170.000000
mean        32.534612
std         96.290787
min          0.000000
25%          3.000000
50%          8.000000
75%         23.000000
max       2466.000000
Name: duration, dtype: float64
Value 30s is approximately at the 80.03th percentile.


In [11]:
metadata = pd.read_csv(get_metadata_path())

In [14]:
min_duration = 20

dir = os.path.join(get_project_root(), 'data', f"duration{min_duration}")
os.makedirs(dir, exist_ok=True)

summary_path = os.path.join(dir, "matched_acc_summary.csv")
data_path = os.path.join(dir, "matched_acc_data.csv")
metadata_path = os.path.join(dir, "matched_acc_metadata.csv")

if not (os.path.exists(summary_path) and os.path.exists(data_path) and os.path.exists(metadata_path)):
    acc_summary, acc_data, acc_data_metadata = create_matched_data(
        metadata, all_annotations, min_duration=min_duration, verbose=True
    )
    acc_summary.to_csv(summary_path, index=False)
    acc_data.to_csv(data_path, index=False)
    acc_data_metadata.to_csv(metadata_path, index=False)
else:
    acc_summary = pd.read_csv(summary_path)
    acc_data = pd.read_csv(data_path)
    acc_data_metadata = pd.read_csv(metadata_path)

    acc_data['acc_x'] = acc_data['acc_x'].apply(json.loads)
    acc_data['acc_y'] = acc_data['acc_y'].apply(json.loads)
    acc_data['acc_z'] = acc_data['acc_z'].apply(json.loads)
