In [1]:
import numpy as np
import pandas as pd 
import pickle

In [2]:
multivariate_datasets = [
    "Cricket", 
    "ERing", 
    "Handwriting",
    "Libras",
    "NATOPS",
    "RacketSports",
    "UWaveGestureLibrary",
    "ArticularyWordRecognition", 
]

univariate_datasets = [
    "ArrowHead", 
    "BME", 
    "ECG200",
    "FacesUCR",
    "GunPoint", 
    "PhalangesOutlinesCorrect",
    "Trace",
]

valid_datasets = univariate_datasets + multivariate_datasets

lst = [[ds,"univariate"] for ds in univariate_datasets] + [[ds, "multivariate"] for ds in multivariate_datasets]
ds_df = pd.DataFrame(lst, columns= ["dataset","type"])

# Shape analysis classification

In [3]:
fpca_df = pd.read_csv("results/shape-fpca_0.0_0.csv",index_col=0)
fpca_df["model"] = "Shape-FPCA (2024)"
tclr_df = pd.read_csv("results/tclr_0.0_0.csv",index_col=0)
tclr_df["model"] = "TCLR (2024)"
lddmm_df = pd.read_csv("results/lddmm_methods_0.0_0.csv",index_col=0)
lddmm_df["model"] = lddmm_df["model"].apply(lambda x : "TS-LDDMM (ours)" if x == "ts-lddmm" else "LDDMM (2008)")
df = pd.concat([fpca_df,tclr_df,lddmm_df]).reset_index(drop = True)
df = pd.merge(df,ds_df,on="dataset")
df = pd.pivot_table(df,"fscore",["type","dataset"],"model",aggfunc="first")
df = df.loc[["univariate","multivariate"]]
df = df[["Shape-FPCA (2024)", "TCLR (2024)", "LDDMM (2008)","TS-LDDMM (ours)"]]
df_s = df.style.format("{:.2f}")

# loop through rows and find which column for each row has the highest value
for row in df.index:
    col = df.loc[row].idxmax()
    col2 = df.loc[row].nlargest(2).idxmin()
    # redo formatting for a specific cell
    df_s = df_s.format(lambda x: "\\textbf{" + f'{x:.2f}' + "}", subset=(row, col))
    df_s = df_s.format(lambda x: "\\underline{" + f'{x:.2f}' + "}", subset=(row, col2))

row,col = np.where(df.isna())
row = df.index[row]
col = df.columns[col]
df_s.format(lambda x : "--",subset=(row,col))


print(df_s.to_latex(hrules = True, clines = "skip-last;data",multirow_align = "t"))


\begin{tabular}{llrrrr}
\toprule
 & model & Shape-FPCA (2024) & TCLR (2024) & LDDMM (2008) & TS-LDDMM (ours) \\
type & dataset &  &  &  &  \\
\midrule
\multirow[t]{7}{*}{univariate} & ArrowHead & 0.18 & \textbf{29.72} & 0.84 & \underline{0.91} \\
 & BME & 0.16 & \textbf{8.02} & 0.82 & \underline{1.00} \\
 & ECG200 & 0.40 & \textbf{5.89} & \underline{0.81} & 0.79 \\
 & FacesUCR & 0.08 & \textbf{77.90} & 0.69 & \underline{0.86} \\
 & GunPoint & 0.93 & \textbf{11.42} & 0.83 & \underline{1.00} \\
 & PhalangesOutlinesCorrect & 0.39 & \textbf{40.95} & \underline{0.53} & 0.52 \\
 & Trace & 0.55 & \textbf{33.96} & 0.46 & \underline{1.00} \\
\cline{1-6}
\multirow[t]{8}{*}{multivariate} & ArticularyWordRecognition & -- & -- & \underline{0.98} & \textbf{1.00} \\
 & Cricket & -- & -- & \underline{0.77} & \textbf{0.93} \\
 & ERing & -- & -- & \underline{0.95} & \textbf{0.98} \\
 & Handwriting & -- & -- & \underline{0.22} & \textbf{0.44} \\
 & Libras & -- & -- & \underline{0.56} & \textbf{0.60} \\
 

# Robustness

In [4]:
model_mapping = dict(
    rnn = "RNN (1999)",
    lstm = 'LSTM (1997)',
    gru = "GRU (2014)",
    mtan = 'MTAN (2021)',
    miam = 'MIAM (2022)',
    neuralsde_1_18 = 'Neural SDE (2019)', 
    neuralsde_4_17 = 'Neural LNSDE (2024)',
)
model_mapping['ode-lstm'] = 'ODE-LSTM (2020)'
model_mapping['lddmm'] = 'LDDMM (2008)'
model_mapping['ts-lddmm'] = 'TS-LDDMM (ours)'

In [5]:
model_name_list = [
    'rnn', 'lstm', 'gru',
    'mtan', 'miam',
    'ode-lstm',
    "neuralsde_1_18", "neuralsde_4_17"
]

In [6]:
model_order = [
    "RNN (1999)",
    'LSTM (1997)',
    "GRU (2014)",
    'MTAN (2021)',
    'MIAM (2022)',
    'ODE-LSTM (2020)',
    'Neural SDE (2019)', 
    'Neural LNSDE (2024)',
    'LDDMM (2008)',
    'TS-LDDMM (ours)'
]

In [7]:
lst = []
for missing_rate in [0.0,0.3,0.5,0.7]:
    tlst = []
    for dataset in valid_datasets: 
        for model in model_name_list: 
            path = "results/{}/{}/{}_{}_{}_0".format(dataset,missing_rate,dataset,missing_rate,model)
            with open(path,"rb") as f:
                score = pickle.load(f)[-1]["f1score"]
                tlst.append([dataset,model,score])
    df = pd.DataFrame(tlst, columns=["dataset","model","score"])
    df["missing_rate"] = missing_rate
    lst.append(df)

for missing_rate in [0.0,0.3,0.5,0.7]: 
    df = pd.read_csv("results/lddmm_methods_{}_0.csv".format(missing_rate),index_col=0)
    df["missing_rate"] = missing_rate
    df["score"] = df["fscore"]
    df = df[["dataset", "model", "score", "missing_rate"]]
    lst.append(df)

df = pd.concat(lst)
df['model'] = df['model'].apply(lambda x : model_mapping[x])
df["score"] = df["score"].apply(lambda x : -x )

df = pd.pivot_table(df,"score",["missing_rate","dataset"],"model", aggfunc="first")
df = df.rank(axis=1,method="max").reset_index()
df = df.drop(["dataset"],axis=1)
df = df.groupby("missing_rate").mean()
df = df[model_order]
df = df.reset_index()
rank_df = pd.melt(df,id_vars="missing_rate")
rank_df = rank_df.set_index(["model","missing_rate"])

In [8]:
lst = []
for missing_rate in [0.0,0.3,0.5,0.7]:
    tlst = []
    for dataset in valid_datasets: 
        for model in model_name_list: 
            path = "results/{}/{}/{}_{}_{}_0".format(dataset,missing_rate,dataset,missing_rate,model)
            with open(path,"rb") as f:
                score = pickle.load(f)[-1]["f1score"]
                tlst.append([dataset,model,score])
    df = pd.DataFrame(tlst, columns=["dataset","model","score"])
    df["missing_rate"] = missing_rate
    lst.append(df)

for missing_rate in [0.0,0.3,0.5,0.7]: 
    df = pd.read_csv("results/lddmm_methods_{}_0.csv".format(missing_rate),index_col=0)
    df["missing_rate"] = missing_rate
    df["score"] = df["fscore"]
    df = df[["dataset", "model", "score", "missing_rate"]]
    lst.append(df)
        
df = pd.concat(lst)
df['model'] = df['model'].apply(lambda x : model_mapping[x])
mean_df = df.groupby(["model","missing_rate"]).score.mean()
std_df = df.groupby(["model","missing_rate"]).score.std()
df = pd.concat([mean_df,std_df,rank_df],axis=1)
df["mean_std"] = df.apply(lambda row : "${} \pm {}$  & {}".format(np.round(row[0],2),np.round(row[1],2),np.round(row[2],2)),axis=1)
df = df["mean_std"].reset_index()
df = pd.pivot_table(df,"mean_std","model","missing_rate",aggfunc="first")
df = df.loc[model_order]
print(df.style.to_latex(hrules = True, clines = "skip-last;data",multirow_align = "t"))

\begin{tabular}{lllll}
\toprule
missing_rate & 0.000000 & 0.300000 & 0.500000 & 0.700000 \\
model &  &  &  &  \\
\midrule
RNN (1999) & $0.64 \pm 0.21$  & 6.2 & $0.53 \pm 0.23$  & 6.6 & $0.48 \pm 0.21$  & 7.2 & $0.44 \pm 0.21$  & 6.07 \\
LSTM (1997) & $0.61 \pm 0.29$  & 6.0 & $0.57 \pm 0.29$  & 6.27 & $0.53 \pm 0.25$  & 6.07 & $0.51 \pm 0.29$  & 5.27 \\
GRU (2014) & $0.71 \pm 0.26$  & 4.2 & $0.68 \pm 0.28$  & 4.27 & $0.66 \pm 0.28$  & 3.73 & $0.59 \pm 0.28$  & 3.67 \\
MTAN (2021) & $0.59 \pm 0.28$  & 7.13 & $0.58 \pm 0.28$  & 5.8 & $0.54 \pm 0.29$  & 5.33 & $0.51 \pm 0.28$  & 5.0 \\
MIAM (2022) & $0.48 \pm 0.35$  & 6.93 & $0.42 \pm 0.33$  & 8.27 & $0.47 \pm 0.31$  & 6.93 & $0.35 \pm 0.31$  & 7.6 \\
ODE-LSTM (2020) & $0.63 \pm 0.24$  & 6.0 & $0.57 \pm 0.25$  & 6.53 & $0.51 \pm 0.24$  & 7.27 & $0.45 \pm 0.23$  & 6.73 \\
Neural SDE (2019) & $0.48 \pm 0.28$  & 7.67 & $0.47 \pm 0.26$  & 7.47 & $0.45 \pm 0.27$  & 7.13 & $0.45 \pm 0.25$  & 6.0 \\
Neural LNSDE (2024) & $0.7 \pm 0.27$  & 3.87 & 

  df["mean_std"] = df.apply(lambda row : "${} \pm {}$  & {}".format(np.round(row[0],2),np.round(row[1],2),np.round(row[2],2)),axis=1)
  df["mean_std"] = df.apply(lambda row : "${} \pm {}$  & {}".format(np.round(row[0],2),np.round(row[1],2),np.round(row[2],2)),axis=1)


: 