# Classification Tasks with Kinematic Time Series from Head Pose Estimation from HMD

---
# Results compilation


In [None]:
# Add files to sys.path
from pathlib import Path
import sys,os
this_path = None
try:
    this_path = str(os.path.dirname(os.path.abspath(__file__))) #str(Path().absolute())+"/" # str(os.path.dirname(__file__))
except:
    this_path = str(Path().absolute())+"/" #str(Path().absolute())+"/" # str(os.path.dirname(__file__))
print("File Path:", this_path)
sys.path.append(os.path.join(this_path, "kinemats"))


# Import classes
import utils  # Utils for generation of files and paths

from plotter.ts_visualization import *
import ts_processing
import ts_classification

# Import data science libs
import numpy as np
import pandas as pd

import matplotlib
matplotlib.rcParams['text.usetex'] = False
matplotlib.rcParams['font.family'] = 'sans-serif'
matplotlib.rcParams['font.sans-serif'] = 'Arial'

#%matplotlib inline
import matplotlib.pyplot as plt

import seaborn as sns
from statannot import add_stat_annotation

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

from libs.criticaldifference import *

---
# SETUP

In [None]:
# CONSTANTS
import experiment_config
from experiment_config import Datasets, DataRepresentation, Classifiers
from ts_classification import EnumDistMetrics

### SPECIFIC CONSTANTS

# if(experiment_config.DATASET_MAIN is not Datasets.Tsinghua):
#     raise ValueError("Set experiment_config.DATASET_MAIN to Tsinghua, because dataset 3D plots are based on it.")

# Filenames of created files from this script
FILENAME_DATASET_QUATERNION = str(experiment_config.PREFIX_DATASET+str(DataRepresentation.Quaternion))      # generates "dataset_quaternion"
FILENAME_DATASET_EULER = str(experiment_config.PREFIX_DATASET+str(DataRepresentation.Euler))
FILENAME_DATASET_SPHERICAL = str(experiment_config.PREFIX_DATASET+str(DataRepresentation.Spherical))
FILENAME_DATASET_YAW = str(experiment_config.PREFIX_DATASET+str(DataRepresentation.Yaw))

# All the files generated from this notebook are in a subfolder with this name
NOTEBOOK_SUBFOLDER_NAME = '99_Results/'

---
# UTILITY FUNCTIONS

Generate paths to write output files

In [None]:
STR_DATASET = str(experiment_config.DATASET_MAIN)+"/"
print(STR_DATASET)
def gen_path_plot(filename, extension=None):
    # Generates full paths for PLOTS just by specifying a name
    return utils.generate_complete_path(filename, \
                                        main_folder=experiment_config.PLOT_FOLDER, \
                                        subfolders=NOTEBOOK_SUBFOLDER_NAME, \
                                        file_extension=experiment_config.IMG_FORMAT if None else extension, save_files=experiment_config.EXPORT_PLOTS)

def gen_path_temp(filename, subfolders="", extension=experiment_config.TEMP_FORMAT):
    # Generates full paths for TEMP FILES just by specifying a name
    return utils.generate_complete_path(filename, \
                                        main_folder=experiment_config.TEMP_FOLDER, \
                                        subfolders=STR_DATASET+subfolders, \
                                        file_extension=extension)

def gen_path_results(filename, subfolders="", extension=""):
    # Generates full paths for RESULTS FILES (like pandas dataframes)
    return utils.generate_complete_path(filename, \
                                        main_folder=experiment_config.RESULTS_FOLDER, \
                                        subfolders=NOTEBOOK_SUBFOLDER_NAME+subfolders, \
                                        file_extension=extension)

---
# RESULTS

In [None]:
### LOAD RESULTS FROM CUSTOM FOLDER CONTAINING RESULTS FROM VIDEO CLASSIFICATION
TEMP_RESULTS_FOLDER = experiment_config.ROOT+"results_classesAsVideos/"

# IMT dataset
imt_fbc = pd.read_csv(TEMP_RESULTS_FOLDER + "IMT/2_FeatureBasedClassifiers/" + experiment_config.RESULTS_FILENAME + ".csv")
imt_fbc.rename(columns = {"dataset":"datatype"}, inplace=True)
imt_fbc["classifier"] = imt_fbc["classifier"].map({"Classifiers.KNN":"KNN", "Classifiers.DT":"DT", "Classifiers.RF":"RF", "Classifiers.GBM":"GBM"})
imt_fbc["datatype"] = imt_fbc["datatype"].map({"quaternion":"Quaternion", "euler":"Euler", "yaw":"Yaw", "all":"All"})
imt_fbc["methodtype"] = "fbc"

imt_tsc = pd.read_csv(TEMP_RESULTS_FOLDER + "IMT/3_TimeSeriesClassifiers/" + experiment_config.RESULTS_FILENAME + ".csv")
imt_tsc.rename(columns = {"dataset":"datatype"}, inplace=True)
imt_tsc["classifier"] = imt_tsc["classifier"].map({"Classifiers.KNN":"1NN-DTW", "Classifiers.ROCKET":"ROCKET", "Classifiers.MiniRocket":"MiniRocket"})
# imt_tsc["classifier"] = imt_tsc["classifier"].replace("Classifiers.KNN", "Classifiers.KNNts")
imt_tsc["datatype"] = imt_tsc["datatype"].map({"quaternion":"Quaternion", "euler":"Euler", "yaw":"Yaw", "all":"All"})
imt_tsc["methodtype"] = "tsc"

IMT_results1 = pd.concat([imt_fbc, imt_tsc], ignore_index=True)
IMT_results1["dataset"] = "IMT"

# Tsinghua dataset
tsg_fbc = pd.read_csv(TEMP_RESULTS_FOLDER + "Tsinghua/2_FeatureBasedClassifiers/" + experiment_config.RESULTS_FILENAME + ".csv")
tsg_fbc.rename(columns = {"dataset":"datatype"}, inplace=True)
tsg_fbc["classifier"] = tsg_fbc["classifier"].map({"Classifiers.KNN":"KNN", "Classifiers.DT":"DT", "Classifiers.RF":"RF", "Classifiers.GBM":"GBM"})
tsg_fbc["datatype"] = tsg_fbc["datatype"].map({"quaternion":"Quaternion", "euler":"Euler", "yaw":"Yaw", "all":"All"})
tsg_fbc["methodtype"] = "fbc"

tsg_tsc = pd.read_csv(TEMP_RESULTS_FOLDER + "Tsinghua/3_TimeSeriesClassifiers/" + experiment_config.RESULTS_FILENAME + ".csv")
tsg_tsc.rename(columns = {"dataset":"datatype"}, inplace=True)
tsg_tsc["classifier"] = tsg_tsc["classifier"].map({"Classifiers.KNN":"1NN-DTW", "Classifiers.ROCKET":"ROCKET", "Classifiers.MiniRocket":"MiniRocket"})
# imt_tsc["classifier"] = imt_tsc["classifier"].replace("Classifiers.KNN", "Classifiers.KNNts")
tsg_tsc["datatype"] = tsg_tsc["datatype"].map({"quaternion":"Quaternion", "euler":"Euler", "yaw":"Yaw", "all":"All"})
tsg_tsc["methodtype"] = "tsc"

TSINGHUA_results1 = pd.concat([tsg_fbc, tsg_tsc], ignore_index=True)
TSINGHUA_results1["dataset"] = "Tsinghua"

# Compile results
results1 = pd.concat([IMT_results1.copy(), TSINGHUA_results1.copy()], ignore_index=True)
results1["classlabels"] = "videos"

### LOAD RESULTS FROM CUSTOM FOLDER CONTAINING RESULTS FROM USER IDENTIFICATION
TEMP_RESULTS_FOLDER = experiment_config.ROOT+"results_classesAsUsers/"

# IMT dataset
imt_fbc = pd.read_csv(TEMP_RESULTS_FOLDER + "IMT/2_FeatureBasedClassifiers/" + experiment_config.RESULTS_FILENAME + ".csv")
imt_fbc.rename(columns = {"dataset":"datatype"}, inplace=True)
imt_fbc["classifier"] = imt_fbc["classifier"].map({"Classifiers.KNN":"KNN", "Classifiers.DT":"DT", "Classifiers.RF":"RF", "Classifiers.GBM":"GBM"})
imt_fbc["datatype"] = imt_fbc["datatype"].map({"quaternion":"Quaternion", "euler":"Euler", "yaw":"Yaw", "all":"All"})
imt_fbc["methodtype"] = "fbc"

imt_tsc = pd.read_csv(TEMP_RESULTS_FOLDER + "IMT/3_TimeSeriesClassifiers/" + experiment_config.RESULTS_FILENAME + ".csv")
imt_tsc.rename(columns = {"dataset":"datatype"}, inplace=True)
imt_tsc["classifier"] = imt_tsc["classifier"].map({"Classifiers.KNN":"1NN-DTW", "Classifiers.ROCKET":"ROCKET", "Classifiers.MiniRocket":"MiniRocket"})
# imt_tsc["classifier"] = imt_tsc["classifier"].replace("Classifiers.KNN", "Classifiers.KNNts")
imt_tsc["datatype"] = imt_tsc["datatype"].map({"quaternion":"Quaternion", "euler":"Euler", "yaw":"Yaw", "all":"All"})
imt_tsc["methodtype"] = "tsc"

IMT_results2 = pd.concat([imt_fbc, imt_tsc], ignore_index=True)
IMT_results2["dataset"] = "IMT"

# Tsinghua dataset
tsg_fbc = pd.read_csv(TEMP_RESULTS_FOLDER + "Tsinghua/2_FeatureBasedClassifiers/" + experiment_config.RESULTS_FILENAME + ".csv")
tsg_fbc.rename(columns = {"dataset":"datatype"}, inplace=True)
tsg_fbc["classifier"] = tsg_fbc["classifier"].map({"Classifiers.KNN":"KNN", "Classifiers.DT":"DT", "Classifiers.RF":"RF", "Classifiers.GBM":"GBM"})
tsg_fbc["datatype"] = tsg_fbc["datatype"].map({"quaternion":"Quaternion", "euler":"Euler", "yaw":"Yaw", "all":"All"})
tsg_fbc["methodtype"] = "fbc"

tsg_tsc = pd.read_csv(TEMP_RESULTS_FOLDER + "Tsinghua/3_TimeSeriesClassifiers/" + experiment_config.RESULTS_FILENAME + ".csv")
tsg_tsc.rename(columns = {"dataset":"datatype"}, inplace=True)
tsg_tsc["classifier"] = tsg_tsc["classifier"].map({"Classifiers.KNN":"1NN-DTW", "Classifiers.ROCKET":"ROCKET", "Classifiers.MiniRocket":"MiniRocket"})
# imt_tsc["classifier"] = imt_tsc["classifier"].replace("Classifiers.KNN", "Classifiers.KNNts")
tsg_tsc["datatype"] = tsg_tsc["datatype"].map({"quaternion":"Quaternion", "euler":"Euler", "yaw":"Yaw", "all":"All"})
tsg_tsc["methodtype"] = "tsc"

TSINGHUA_results2 = pd.concat([tsg_fbc.copy(), tsg_tsc.copy()], ignore_index=True)
TSINGHUA_results2["dataset"] = "Tsinghua"

# Compile results
results2 = pd.concat([IMT_results2, TSINGHUA_results2], ignore_index=True)
results2["classlabels"] = "users"

### FINAL COMPILATION OF RESULTS
df_results = pd.concat([results1, results2], ignore_index=True)

df_results.to_csv(gen_path_results("compilation_results_all", extension=".csv"), index=False)

In [None]:
print(df_results.shape)
print(df_results.head())

## Plot time series

In [None]:
### LOAD Tsinhua dataset and 3D plot qw,qk dimensions of quaternion

labels_filename = experiment_config.DATASET_LABELS # Cluster index TRUE_LABEL
timestamps_filename = experiment_config.DATASET_TIMESTAMPS # Timestamps
labels = pd.read_csv(labels_filename)
classes = labels[experiment_config.CLASS_COLUMN_NAME].to_numpy(dtype=np.int32)
timestamps = np.loadtxt(timestamps_filename)

# Datasets
dataset_quaternion =  utils.load_binaryfile_npy( gen_path_temp( FILENAME_DATASET_QUATERNION ) )
dataset_euler = utils.load_binaryfile_npy( gen_path_temp( FILENAME_DATASET_EULER ) )
dataset_yaw = utils.load_binaryfile_npy( gen_path_temp( FILENAME_DATASET_YAW ) )

In [None]:
def plot_summary_ts(data:np.ndarray, data_labels:np.ndarray, rolling_window_size = 5, axes_labels = ["X","Y","Z"], colors = ['r','g','b'], figsize=(10,20), save_path = None):
    """
    Returns a plot with mean +- std of time series organized
    in rows per class label, and columns per axis of movement.

    :param data: Numpy array with first dimension equal to time series index, second dimension values, and third dimension axes.
    :param data_labels: 1D numpy array with labels of each time series.
    :return: Plot
    :rtype: matplotlib fig
    """

    # Force 3D array even if it is 2D array. For compatibility with functions
    if(data.ndim == 2):
        data = np.expand_dims(data, axis=2)

    num_ts      = data.shape[0]
    length_ts   = data.shape[1]
    dim_ts      = data.shape[2]

    classes = np.unique(data_labels)
    num_classes = len(classes)
    num_axes = len(axes_labels)

    if dim_ts != num_axes or dim_ts != len(colors):
        print("Please set `axes_labels` and `colors` with a list of same length than dimensions of one time series")
        return 1

    cols_plot = num_axes
    rows_plot = num_classes
    fig, axes = plt.subplots(num_classes, num_axes, sharex=True, sharey=True, figsize=(figsize[0]*cols_plot, figsize[1]*rows_plot), gridspec_kw={"wspace":0.0,"hspace":0.0})
    # from matplotlib import gridspec
    # fig = plt.figure(figsize=(figsize[0]*cols_plot, figsize[1]*rows_plot))
    # gs = gridspec.GridSpec(rows_plot, cols_plot, wspace=0.0, hspace=0.0)

    for i in range(rows_plot):
        for j in range(cols_plot):
            t_class = classes[i]
            t_axis = axes_labels[j]

            # Filter the time series corresponding to each class
            filtered_index = np.where(data_labels==t_class)[0].tolist()
            data_temp = pd.DataFrame(data[filtered_index, :, j])

            # Moving window with average to reduce noise.
            # Calculates mean and std among all time series for the specific class
            mean_ts = data_temp.rolling(rolling_window_size, axis=1).mean().mean()
            sd_ts   = data_temp.rolling(rolling_window_size, axis=1).mean().std()

            # Limits to fill are in the plot
            low_line  = (mean_ts - sd_ts)
            high_line = (mean_ts + sd_ts)
        
            # Avoid error when there is only one dimensional TS
            idx_axes = tuple([i,j]) if(dim_ts>1) else tuple([i])

            # Plots
            axes[idx_axes].plot(mean_ts, colors[j], linewidth=2)
            axes[idx_axes].fill_between(mean_ts.index, low_line, high_line, color=colors[j], alpha=0.2)

            # axes = plt.subplot(gs[i, j])
            # axes.plot(mean_ts, colors[j], linewidth=1)
            # axes.fill_between(mean_ts.index, low_line, high_line, color=colors[j], alpha=0.3)
            # axes.set(ylim=(-1,1))
            axes[idx_axes].set_xticklabels([])
            axes[idx_axes].grid(True)
            # axes.set_yticklabels([])

            # Set column labels
            if(i == 0): axes[idx_axes].set_title(str('Dimension: ' + t_axis), fontsize=12)
            if(j == 0): axes[idx_axes].set_ylabel(str('Class:' + str(t_class)))
    
    # plt.text(0.02, 0.98, "Quaternion", fontsize=14, ha="center", va="center", bbox = dict(boxstyle="darrow",
    #                                                                                             ec="k",
    #                                                                                             fc=(0.1,0.1,0.1,0),
    #                                                                                             alpha=0.5,
    #                                                                                             zorder=-5)
    #                                                                                             )

    if(save_path is not None):
        if not os.path.isdir(os.path.dirname(save_path)):
            os.makedirs(os.path.dirname(save_path))
        plt.savefig(save_path, dpi=100, bbox_inches='tight')

    # fig.tight_layout()
    # plt.suptitle("$\mu \pm \sigma$ Motion Trajectories")
    return

In [None]:
# dataset = dataset_quaternion
# num_ts, length_ts, num_dims = dataset.shape
# ROLLING_WINDOW_PLOT = int(length_ts*0.02)

# if(experiment_config.SHOW_PLOTS): plot_summary_ts(dataset, classes, \
#                     rolling_window_size=ROLLING_WINDOW_PLOT, \
#                     axes_labels=["$q_w$","$q_i$","$q_j$","$q_k$"], \
#                     colors=['k','r','g','b'], \
#                     figsize=(2.5,1.8), \
#                     save_path=gen_path_plot("results_quaternion_traject", extension=".pdf"))

In [None]:
dataset = np.concatenate([dataset_quaternion, dataset_euler/np.pi], axis=-1)
num_ts, length_ts, num_dims = dataset.shape
ROLLING_WINDOW_PLOT = int(length_ts*0.02)

if(experiment_config.SHOW_PLOTS): plot_summary_ts(dataset, classes, \
                    rolling_window_size=ROLLING_WINDOW_PLOT, \
                    axes_labels=["$q_w$","$q_i$","$q_j$","$q_k$", "Yaw $\psi/\pi$","Pitch $\\theta/\pi$","Roll $\phi/\pi$"], \
                    colors=['k','r','g','b','m','y','c'], \
                    figsize=(2.2,1.2), \
                    save_path=gen_path_plot("results_all_traject", extension=".pdf"))

# Results

---
## Critical Diagram

In [None]:
df_results.head()

In [None]:
### BEST CLASSIFIER AMONG ALL FOLDS OF TWO DATASETS, FOUR DATA REPRESENTATIONS

data = df_results
data = data[ data["classlabels"] == "videos"]
data = data.drop(columns = ["fit_time","score_time","classlabels", "methodtype"])
data["datasetfold"] = data.agg( lambda x: f"{x['datatype']}{x['dataset']}{x['fold']}", axis=1 )
data = data.drop(columns = ["datatype","dataset","fold"])
print(data.shape)
print(data.head())

filename = gen_path_plot("critdiff_f1score", extension=".pdf")
criticaldiff_diagram = draw_cd_diagram(df_perf=data, title=None, labels=True, img_filepath=filename, classifier_colname = "classifier", dataset_colname="datasetfold", performance_colname="test_f1_macro", plot_width=7, plot_textspace=1.5)

In [None]:
### BEST CLASSIFIER PER DATASET AMONG ALL DATA REPRESENTATIONS AND FOLDS

for d_dtset in ["IMT","Tsinghua"]:
    data = df_results
    data = data[ data["dataset"] == d_dtset]
    data = data[ data["classlabels"] == "videos"]
    data = data.drop(columns = ["fit_time","score_time","classlabels", "methodtype","dataset"])
    data["datasetfold"] = data.agg( lambda x: f"{x['datatype']}{x['fold']}", axis=1 )
    data = data.drop(columns = ["datatype","fold"])
    print(data.shape)
    print(data.head())

    filename = gen_path_plot("critdiff_f1score_"+d_dtset, extension=".pdf")
    criticaldiff_diagram = draw_cd_diagram(df_perf=data, title=None, labels=True, img_filepath=filename, classifier_colname = "classifier", dataset_colname="datasetfold", performance_colname="test_f1_macro", plot_width=8, plot_textspace=1.5)

---
## Table sumarizing performance

In [None]:
df_results.head()

data = df_results
data = data[ data["classlabels"]=="videos" ]
data = data.drop(columns=["fit_time","score_time","test_accuracy","test_precision_macro","test_recall_macro","classlabels","fold"])
data.head()


In [None]:
data["methodtype"] = data["methodtype"].map({"fbc":"Feature-based classifiers", "tsc":"Time-series classifiers"})
d = data.groupby( ["datatype","dataset","methodtype","classifier"] ).mean()
d = d.unstack(["methodtype","classifier"])
d

---
## Line plot summarizing times to fit-predict

In [None]:
data = df_results
data = data[ data["classlabels"]=="videos" ]
data

In [None]:
# Add together the time to train and fit one fold
data["time"] = data["fit_time"] + data["score_time"]

# Drop unneeded columns
data = data.drop(columns=["test_accuracy","test_f1_macro","test_precision_macro","test_recall_macro","methodtype","classlabels", "fit_time","score_time","fold"])
data

In [None]:
# Average time per classifier, dataype, dataset
data = data.groupby( ["datatype","dataset","classifier"] ).mean()
data = data.unstack("classifier")
data = data.reset_index()
data

In [None]:
# Length of the datasets
M_imt=290; M_tsg=432
N_imt=901; N_tsg=3601
P_yaw=1; P_eul=3; P_quat=4; P_all=P_quat+P_eul

# dict_mapping = {
#     ("All","IMT"):          M_imt * N_imt * P_all,
#     ("Quaternion","IMT"):   M_imt * N_imt * P_quat,
#     ("Euler","IMT"):        M_imt * N_imt * P_eul,
#     ("Yaw","IMT"):          M_imt * N_imt * P_yaw,
#     ("All","Tsinghua"):          M_tsg * N_tsg * P_all,
#     ("Quaternion","Tsinghua"):   M_tsg * N_tsg * P_quat,
#     ("Euler","Tsinghua"):        M_tsg * N_tsg * P_eul,
#     ("Yaw","Tsinghua"):          M_tsg * N_tsg * P_yaw,
# }

dict_mapping = {
    ("All","IMT"):          N_imt *  P_all,
    ("Quaternion","IMT"):   N_imt *  P_quat,
    ("Euler","IMT"):        N_imt *  P_eul,
    ("Yaw","IMT"):          N_imt *  P_yaw,
    ("All","Tsinghua"):          N_tsg * P_all,
    ("Quaternion","Tsinghua"):   N_tsg * P_quat,
    ("Euler","Tsinghua"):        N_tsg * P_eul,
    ("Yaw","Tsinghua"):          N_tsg * P_yaw,
}
dict_mapping

In [None]:
# Add data size that was used for the train,test split.
datasize = []
for (t,d) in zip(data["datatype"],data["dataset"]):
    datasize.append( dict_mapping[(t,d)] )

In [None]:
# Final arrays for plotting
data = data["time"] # Keep only times

# Add the size of dataset as index
datasize = np.array(datasize)
data.index = datasize
data.sort_index(inplace=True)
data.columns = ["KNN","DT","RF","GBM","1NN-DTW","MiniRocket","ROCKET"]
data

In [None]:
fig, axes = plt.subplots(1, 1, figsize=(8, 5))
markers = ["*","x","+","v",">","D","s"]

for i,c in enumerate(data.columns):
    axes.plot(data.index, data[c], markers[i]+"-", label=c)
    # axes.set_xscale("log")
    axes.set_yscale("log")
    axes.set_title("Time to train-fit one fold")

axes.legend()

## Scaled training time vs accuracy

In [None]:
data = df_results
data = data[ data["classlabels"]=="videos" ]
data

In [None]:
# Add together the time to train and fit one fold
data = data.rename(columns={"datatype": "repr.","test_f1_macro":"f1_score"})
data["time"] = data["fit_time"] + data["score_time"]

# Drop unneeded columns
data = data.drop(columns=["test_accuracy","test_precision_macro","test_recall_macro","methodtype","classlabels", "fit_time","score_time","fold"])
data

In [None]:
# Average time per classifier, dataype, dataset
data = data.groupby( ["repr.","dataset","classifier"] ).mean()
data = data.unstack("classifier")
# data = data.reset_index()
data

In [None]:
# Scale values with respect to the performance and time of ROCKET
data_scaled = data
#data_scaled["f1_score"] = data_scaled["f1_score"] / 

for group in ["f1_score", "time"]:
    for c in data_scaled[group].columns:
        data_scaled[group][c] = data_scaled[group][c].values / data_scaled[group]["ROCKET"].values.copy()

data_scaled

In [None]:
# Return to wide format
data_scaled = data_scaled.stack("classifier").reset_index()
data_scaled.head()

In [None]:
sns.set_style('whitegrid')
sns.set(rc={"figure.figsize":(7,4)})

# Scatter plot
hue_order = ["KNN","DT","RF","GBM","1NN-DTW","MiniRocket","ROCKET"]
ax = sns.scatterplot(data=data_scaled, x="time", y="f1_score", hue="classifier", style="dataset", hue_order=hue_order, s=80)
ax.axvline(x=1, c="gray", linestyle="--", linewidth=2)
ax.axhline(y=1, c="gray", linestyle="--", linewidth=2)

# Texts
ax.text(x=0.02, y=1.21, c="dimgray", s="Faster and more accurate")
ax.text(x=4.5, y=1.21, c="dimgray", s="Slower but more accurate")
ax.text(x=0.02, y=0.35, c="dimgray", s="Faster but less accurate")
ax.text(x=4.5, y=0.35, c="dimgray", s="Slower and less accurate")
ax.annotate("Proposed method\n with ROCKET", xy=(1,1), xytext=(0.07,1.07), c="black", arrowprops=dict(arrowstyle="fancy", fc="k", ec="k"))

# Details
ax.set_ylim([0.3,1.3])
ax.set_xscale("log")
ax.set_xlabel("Scaled training time")
ax.set_ylabel("Scaled performance (f1-score)")
# plt.title("Training times")
plt.legend(bbox_to_anchor=(0.12, -0.2), loc='upper left', borderaxespad=0, ncol=3)

filename = gen_path_plot("scatterplot_trainingtimes", extension=".pdf")
plt.savefig(filename, dpi=400, bbox_inches='tight')

---

All plots in a single image

In [None]:
# sns.set_style("ticks")

# mc_iter = experiment_config.MC_ITERATIONS

# rows_plot=3
# cols_plot=2
# y_lim = [0,1]
# figsize=(5,4)
# save_path = gen_path_plot(f"compiled_results_2",extension=".pdf") # None

# fig, axes = plt.subplots(rows_plot, cols_plot, figsize=(figsize[0]*cols_plot, figsize[1]*rows_plot), sharex=False, sharey=False)
# ax_idx = 0
# try:
#     axes = axes.T.flatten() #axes.flatten()
# except Exception as e: # Only one axis
#     axes = [axes]

# ############################### ###############################
# ############################### IMT ###############################
# ############################### ###############################
# dataset_text = "IMT" #"IMT" # "Tsinghua"
# dataframe = IMT_results #IMT_results #TSI_results


# # Plot per axis
# data = dataframe #### DATA TO PLOT
# data = data[ data["classLabel"]==1 ]
# x_colname = "classifier"
# y_colname = "accuracy"
# hue_colname = "dataRep"
# ax = axes[ax_idx]; ax_idx=ax_idx+1

# # Plot
# ax = sns.boxplot(ax=ax, data=data, x=x_colname, y=y_colname, hue=hue_colname, linewidth=0.5)
# # Statistical tests
# box_pairs = [
#     (("1-NN","Quaternion(4D)"),("1-NN","Euler(3D)")),
#     (("1-NN","Euler(3D)"),("1-NN","Spherical(2D)")),
#     (("1-NN","Spherical(2D)"),("1-NN","Yaw(1D)")),
#     (("7-NN","Quaternion(4D)"),("7-NN","Euler(3D)")),
#     (("7-NN","Euler(3D)"),("7-NN","Spherical(2D)")),
#     (("7-NN","Spherical(2D)"),("7-NN","Yaw(1D)")),
# ]
# ax, test_results = add_stat_annotation(ax, data=data, x=x_colname, y=y_colname, hue=hue_colname,#order=order,
#                                    box_pairs=box_pairs,
#                                    test='t-test_ind', text_format='star', loc='inside', verbose=2,
#                                    stats_params=dict(alternative="greater"))

# if(y_lim is not None):
#     ax.set_ylim(tuple(y_lim))
# ax.set(title=f"Dataset {dataset_text} | Results per data representation", xlabel=x_colname, ylabel=y_colname)
# ax.grid(True)


# ###############################
# # Plot per axis
# data = dataframe #### DATA TO PLOT
# data = data[ data["classLabel"]==1 ]

# x_colname = "dataRep"
# y_colname = "accuracy"
# hue_colname = "classifier"
# ax = axes[ax_idx]; ax_idx=ax_idx+1

# # Plot
# ax = sns.boxplot(ax=ax, data=data, x=x_colname, y=y_colname, hue=hue_colname, linewidth=0.5, palette="Spectral")
# # Statistical tests
# box_pairs = [
#     (("Quaternion(4D)","1-NN"),("Quaternion(4D)","7-NN")),
#     (("Euler(3D)","1-NN"),("Euler(3D)","7-NN")),
#     (("Spherical(2D)","1-NN"),("Spherical(2D)","7-NN")),
#     (("Yaw(1D)","1-NN"),("Yaw(1D)","7-NN")),
# ]
# ax, test_results = add_stat_annotation(ax, data=data, x=x_colname, y=y_colname, hue=hue_colname, #order=order,
#                                    box_pairs=box_pairs,
#                                    test='t-test_ind', text_format='star', loc='inside', verbose=2,
#                                    stats_params=dict(alternative="less"))

# if(y_lim is not None):
#     ax.set_ylim(tuple(y_lim))
# ax.set(title=f"Dataset {dataset_text} | Results per classifier", xlabel=x_colname, ylabel=y_colname)
# ax.grid(True)

# ###############################
# # Plot per axis
# data = dataframe #### DATA TO PLOT
# data = data[data["classLabel"]==1]
# data = data[ data["distMetric"]!=0 ]
# data = data[ data["classifier"]=="7-NN" ]
# x_colname = "dataRep"
# y_colname = "accuracy"
# hue_colname = "distMetric"
# ax = axes[ax_idx]; ax_idx=ax_idx+1

# # Plot
# ax = sns.boxplot(ax=ax, data=data, x=x_colname, y=y_colname, hue=hue_colname, linewidth=0.5, palette="Set2")
# # Statistical tests
# box_pairs = [
#     (("Quaternion(4D)","Euclidean"),("Quaternion(4D)","Spec_Eucl")),
#     (("Quaternion(4D)","Spec_Eucl"),("Quaternion(4D)","DTW")),
#     (("Euler(3D)","Euclidean"),("Euler(3D)","Spec_Eucl")),
#     (("Euler(3D)","Spec_Eucl"),("Euler(3D)","DTW")),
#     (("Spherical(2D)","Euclidean"),("Spherical(2D)","Spec_Eucl")),
#     (("Spherical(2D)","Spec_Eucl"),("Spherical(2D)","DTW")),
#     (("Yaw(1D)","Euclidean"),("Yaw(1D)","Spec_Eucl")),
#     (("Yaw(1D)","Spec_Eucl"),("Yaw(1D)","DTW")),
# ]
# ax, test_results = add_stat_annotation(ax, data=data, x=x_colname, y=y_colname, hue=hue_colname, #order=order,
#                                    box_pairs=box_pairs,
#                                    test='t-test_ind', text_format='star', loc='inside', verbose=2,
#                                    stats_params=dict(alternative="less"))

# if(y_lim is not None):
#     ax.set_ylim(tuple(y_lim))
# ax.set(title=f"Dataset {dataset_text} | Results per distance metric in 7-NN", xlabel=x_colname, ylabel=y_colname)
# ax.grid(True)

# ############################### ###############################
# ############################### TSINGHUA ###############################
# ############################### ###############################
# dataset_text = "Tsinghua" #"IMT" # "Tsinghua"
# dataframe = TSI_results #IMT_results #TSI_results

# # Plot per axis
# data = dataframe #### DATA TO PLOT
# data = data[ data["classLabel"]==1 ]
# x_colname = "classifier"
# y_colname = "accuracy"
# hue_colname = "dataRep"
# ax = axes[ax_idx]; ax_idx=ax_idx+1

# # Plot
# ax = sns.boxplot(ax=ax, data=data, x=x_colname, y=y_colname, hue=hue_colname, linewidth=0.5)
# # Statistical tests
# box_pairs = [
#     (("1-NN","Quaternion(4D)"),("1-NN","Euler(3D)")),
#     (("1-NN","Euler(3D)"),("1-NN","Spherical(2D)")),
#     (("1-NN","Spherical(2D)"),("1-NN","Yaw(1D)")),
#     (("11-NN","Quaternion(4D)"),("11-NN","Euler(3D)")),
#     (("11-NN","Euler(3D)"),("11-NN","Spherical(2D)")),
#     (("11-NN","Spherical(2D)"),("11-NN","Yaw(1D)")),
# ]
# ax, test_results = add_stat_annotation(ax, data=data, x=x_colname, y=y_colname, hue=hue_colname,#order=order,
#                                    box_pairs=box_pairs,
#                                    test='t-test_ind', text_format='star', loc='inside', verbose=2,
#                                    stats_params=dict(alternative="greater"))

# if(y_lim is not None):
#     ax.set_ylim(tuple(y_lim))
# ax.set(title=f"Dataset {dataset_text} | Results per data representation", xlabel=x_colname, ylabel=y_colname)
# ax.grid(True)


# ###############################
# # Plot per axis
# data = dataframe #### DATA TO PLOT
# data = data[ data["classLabel"]==1 ]
# x_colname = "dataRep"
# y_colname = "accuracy"
# hue_colname = "classifier"
# ax = axes[ax_idx]; ax_idx=ax_idx+1

# # Plot
# ax = sns.boxplot(ax=ax, data=data, x=x_colname, y=y_colname, hue=hue_colname, linewidth=0.5, palette="Spectral")
# # Statistical tests
# box_pairs = [
#     (("Quaternion(4D)","1-NN"),("Quaternion(4D)","11-NN")),
#     (("Euler(3D)","1-NN"),("Euler(3D)","11-NN")),
#     (("Spherical(2D)","1-NN"),("Spherical(2D)","11-NN")),
#     (("Yaw(1D)","1-NN"),("Yaw(1D)","11-NN")),
# ]
# ax, test_results = add_stat_annotation(ax, data=data, x=x_colname, y=y_colname, hue=hue_colname, #order=order,
#                                    box_pairs=box_pairs,
#                                    test='t-test_ind', text_format='star', loc='inside', verbose=2,
#                                    stats_params=dict(alternative="less"))

# if(y_lim is not None):
#     ax.set_ylim(tuple(y_lim))
# ax.set(title=f"Dataset {dataset_text} | Distance metric in classifier", xlabel=x_colname, ylabel=y_colname)
# ax.grid(True)

# ###############################
# # Plot per axis
# data = dataframe #### DATA TO PLOT
# data = data[data["classLabel"]==1]
# data = data[ data["distMetric"]!=0 ]
# data = data[ data["classifier"]=="11-NN" ]
# x_colname = "dataRep"
# y_colname = "accuracy"
# hue_colname = "distMetric"
# ax = axes[ax_idx]; ax_idx=ax_idx+1

# # Plot
# ax = sns.boxplot(ax=ax, data=data, x=x_colname, y=y_colname, hue=hue_colname, linewidth=0.5, palette="Set2")
# # Statistical tests
# box_pairs = [
#     (("Quaternion(4D)","Euclidean"),("Quaternion(4D)","Spec_Eucl")),
#     (("Quaternion(4D)","Spec_Eucl"),("Quaternion(4D)","DTW")),
#     (("Euler(3D)","Euclidean"),("Euler(3D)","Spec_Eucl")),
#     (("Euler(3D)","Spec_Eucl"),("Euler(3D)","DTW")),
#     (("Spherical(2D)","Euclidean"),("Spherical(2D)","Spec_Eucl")),
#     (("Spherical(2D)","Spec_Eucl"),("Spherical(2D)","DTW")),
#     (("Yaw(1D)","Euclidean"),("Yaw(1D)","Spec_Eucl")),
#     (("Yaw(1D)","Spec_Eucl"),("Yaw(1D)","DTW")),
# ]
# ax, test_results = add_stat_annotation(ax, data=data, x=x_colname, y=y_colname, hue=hue_colname, #order=order,
#                                    box_pairs=box_pairs,
#                                    test='t-test_ind', text_format='star', loc='inside', verbose=2,
#                                    stats_params=dict(alternative="less"))

# if(y_lim is not None):
#     ax.set_ylim(tuple(y_lim))
# ax.set(title=f"Dataset {dataset_text} | Results per distance metric in 11-NN", xlabel=x_colname, ylabel=y_colname)
# ax.grid(True)

# ###############################
# # Figure setup
# # fig.suptitle(f'Classification results over {mc_iter} Monte-Carlo simulations')
# # fig.subplots_adjust(bottom=0.25, wspace=0.08)
# plt.tight_layout()

# if(save_path is not None):
#     if not os.path.isdir(os.path.dirname(save_path)):
#         os.makedirs(os.path.dirname(save_path))
#     plt.savefig(save_path, dpi=400, bbox_inches='tight')

## EOF

In [None]:
print(">> FINISHED WITHOUT ERRORS!!")