In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import os
import re
import pandas as pd
import ace_tools_open as tools;

# Path to the "results" folder
RESULTS_DIR = "TSLibrary/results"

# Regular expression to extract accuracy
accuracy_pattern = re.compile(r"accuracy:([\d\.]+)")

# List to store extracted data
data = []

# Iterate over folders in the "results" directory
for folder in os.listdir(RESULTS_DIR):
    folder_path = os.path.join(RESULTS_DIR, folder)
    if os.path.isdir(folder_path):
        result_file = os.path.join(folder_path, "result_classification.txt")
        
        # Extract dataset and model name from folder name
        parts = folder.split("_")
        if len(parts) < 3:
            continue  # Skip if folder name format is incorrect
        dataset_name = parts[1]
        model_name = parts[2]
        
        # Check if result file exists
        if os.path.isfile(result_file):
            with open(result_file, "r") as f:
                content = f.read()
                
                # Search for accuracy
                match = accuracy_pattern.search(content)
                if match:
                    accuracy = round(100*float(match.group(1)), 2)
                    data.append((dataset_name, model_name, accuracy))


df_tmp = pd.DataFrame(data, columns=["Dataset", "Model", "Accuracy"])
df_attention = df_tmp.pivot(index="Dataset", columns="Model", values="Accuracy").reset_index()
df_attention.columns.name = None
df_non_attention = pd.read_csv("out/classification_results.csv")

df = pd.merge(df_non_attention, df_attention, on="Dataset", how="outer")


In [4]:
# Display table
datasets = [
    "PDSetting1",
    "PDSetting2",
    "PronunciationAudio",
    "ECGFiveDays",
    "FreezerSmallTrain",
    "HouseTwenty",
    "InsectEPGRegularTrain",
    "ItalyPowerDemand",
    "Lightning7",
    "MoteStrain",
    "PowerCons",
    "SonyAIBORobotSurface2",
    "UWaveGestureLibraryAll",
]


models = [
    "DTW", # (2011)
    "TSF", # (2013)
    "BOSS", # (2015)
    "BOSS-E", # (2015)
    "RISE", # (2016)
    "Shapelet", # (2017)
    "catch22", # (2019)
    "SVC", # (2019)
    "LSTM-FCN", # (2019)
    "Teaser", # (2020)
    "Rocket", # (2020)
    "Hive-Cote 2", # (2021)
    "Informer", # (2021)
    "Autoformer", # (2021)
    "FEDformer", # (2022)
    "ETSformer", # (2022)
    "LightTS", # (2022)
    "PatchTST", # (2023)
    "Crossformer", # (2023)
    "DLinear", # (2023)
    "TimesNet", # (2023)
    "iTransformer", # (2024)
    "Mamba", # (2024)
    "Motion Code", # (2024)
]
df = df[['Dataset'] + models]
df = df.set_index("Dataset").loc[datasets].reset_index() 
df["Dataset"] = pd.Categorical(df["Dataset"], categories=datasets, ordered=True)
df = df.sort_values("Dataset")

In [5]:
import numpy as np
# Highlight dataframe with best and second-best accuracy
def highlight_best_html_rounded(data):
    styled_data = data.copy()

    for index, row in data.iterrows():
        # Convert row values, keeping "Error" as NaN to ignore in ranking
        row_values = row[1:].replace("Error", np.nan).astype(float).round(2)

        # Get unique sorted values (descending order), ignoring NaN
        sorted_values = np.sort(row_values.dropna().unique())[::-1]

        # Identify max and second-best values, avoiding NaN influence
        max_val = sorted_values[0] if len(sorted_values) > 0 else None
        second_best_val = sorted_values[1] if len(sorted_values) > 1 else None
        #third_best_val = sorted_values[2] if len(sorted_values) > 2 else None

        for col in row.index[1:]:
            val = row[col]
            if val == "Error":
                continue  # Skip highlighting for "Error" values

            val = round(float(val), 2) 

            if val == max_val:
                styled_data.at[index, col] = f'<span style="background-color: red; color: white;">{val}</span>'
            elif val == second_best_val:
                styled_data.at[index, col] = f'<span style="background-color: blue; color: white;">{val}</span>'
            #elif val == third_best_val:
            #    styled_data.at[index, col] = f'<span style="background-color: green; color: white;">{val}</span>'
            else:
                styled_data.at[index, col] = f"{val}"  # Keep values rounded

    return styled_data

# Apply the improved highlighting function with rounding
highlighted_df = highlight_best_html_rounded(df)


In [6]:
tools.display_dataframe_to_user(name="Classification Results", dataframe=highlighted_df)

Classification Results


Dataset,DTW,TSF,BOSS,BOSS-E,RISE,Shapelet,catch22,SVC,LSTM-FCN,Teaser,Rocket,Hive-Cote 2,Informer,Autoformer,FEDformer,ETSformer,LightTS,PatchTST,Crossformer,DLinear,TimesNet,iTransformer,Mamba,Motion Code
Loading ITables v2.2.5 from the internet... (need help?),,,,,,,,,,,,,,,,,,,,,,,,


In [7]:
html_filename = "out/all_classification_benchmark_results.html"
highlighted_df.to_html(html_filename, escape=False, index=False)