### Experiments: Comparing RSAST with SAST Ridge, HIVE-COTE, ROCKET, TS-CHIEF

In [40]:
from sast.sast import *
from sktime.datasets import load_UCR_UEA_dataset, tsc_dataset_names
from sktime.classification.kernel_based import RocketClassifier
import time
import pandas as pd
from sktime.transformations.panel.shapelet_transform import ShapeletTransform
from cd_diagram.cd_function import *
import os

In [41]:
# Set directory where the csv files are located
directory = '/home/nirojasvar/random_sast/ResultsByClassifier'

# Create an empty list to store the dataframes
dfs = []

# Loop through all files in the directory
for filename in os.listdir(directory):
    # Check if the file starts with "df_overall_tunning" and ends with ".csv"
    if filename.endswith(".csv"):
        # Read the csv file into a dataframe
        filepath = os.path.join(directory, filename)
        df = pd.read_csv(filepath)
        # Append the dataframe to the list
        df['filename']=filename
        dfs.append(df)

# Concatenate all the dataframes in the list into one dataframe
df_other_methods = pd.concat(dfs, ignore_index=True)
df_other_methods=df_other_methods[['filename','folds:','0']]
df_other_methods['method']=df_other_methods['filename'].str.split('_').str[0]
df_other_methods
df_other_methods=df_other_methods.rename(columns={'folds:':'dataset','0':'score'})
df_other_methods=df_other_methods[['dataset','score','method']]

# Set directory where the csv files are located
directory = '/home/nirojasvar/random_sast/rsast_result'

# Create an empty list to store the dataframes
dfs = []

# Loop through all files in the directory
for filename in os.listdir(directory):
    # Check if the file starts with "df_overall_tunning" and ends with ".csv"
    if filename.startswith("df_overall_tunning") and filename.endswith(".csv"):
        # Read the csv file into a dataframe
        filepath = os.path.join(directory, filename)
        df = pd.read_csv(filepath)
        # Append the dataframe to the list
        dfs.append(df)

# Concatenate all the dataframes in the list into one dataframe
df_result = pd.concat(dfs, ignore_index=True)
df_result.columns

Index(['Unnamed: 0', 'accuracy', 'time', 'cweights_time', 'fsubsequence_time',
       'tdataset_time', 'tclassifier_time', 'dataset_name', 'classifier_name',
       'rpoint', 'nb_per_class', 'method', 'len_method'],
      dtype='object')

In [42]:
df_result=df_result[['accuracy','dataset_name','classifier_name']]
df_result=df_result[df_result["classifier_name"]=="ACF&PACF: n_random_points=100 nb_inst_per_class=50"]
df_result.dataset_name.values


array(['SmoothSubspace', 'GunPoint', 'ItalyPowerDemand', 'ShapeletSim',
       'ItalyPowerDemand', 'Chinatown', 'BirdChicken', 'SyntheticControl',
       'Chinatown', 'GunPoint', 'ShapeletSim', 'SmoothSubspace',
       'ShapeletSim', 'Chinatown', 'SmoothSubspace', 'BirdChicken',
       'ItalyPowerDemand', 'GunPoint', 'BirdChicken'], dtype=object)

In [43]:

df_other_methods=df_other_methods.rename(columns={"dataset":"dataset_name","score":"accuracy","method":"classifier_name"})
df_other_methods = df_other_methods[df_other_methods['dataset_name'].isin(df_result.dataset_name.values)]
df_other_methods.columns

Index(['dataset_name', 'accuracy', 'classifier_name'], dtype='object')

In [44]:
# concatenate the two dataframes
concatenated_df = pd.concat([df_other_methods, df_result])

# reset the index of the concatenated dataframe
concatenated_df = concatenated_df.reset_index(drop=True)

# print the concatenated dataframe
print(concatenated_df)

         dataset_name  accuracy  \
0         BirdChicken  0.900000   
1           Chinatown  0.982507   
2            GunPoint  1.000000   
3    ItalyPowerDemand  0.968902   
4         ShapeletSim  1.000000   
..                ...       ...   
112    SmoothSubspace  0.906667   
113       BirdChicken  0.850000   
114  ItalyPowerDemand  0.949466   
115          GunPoint  0.980000   
116       BirdChicken  0.850000   

                                       classifier_name  
0                                               ROCKET  
1                                               ROCKET  
2                                               ROCKET  
3                                               ROCKET  
4                                               ROCKET  
..                                                 ...  
112  ACF&PACF: n_random_points=100 nb_inst_per_clas...  
113  ACF&PACF: n_random_points=100 nb_inst_per_clas...  
114  ACF&PACF: n_random_points=100 nb_inst_per_clas...  
115  ACF&

In [45]:
concatenated_df['classifier_name'].unique()

array(['ROCKET', 'HIVE-COTEv1', 'BOSS', 'RISE', 'TSF', 'InceptionTime',
       'TS-CHIEF', 'S-BOSS', 'STC', 'ResNet', 'ProximityForest', 'WEASEL',
       'Catch22', 'cBOSS',
       'ACF&PACF: n_random_points=100 nb_inst_per_class=50'], dtype=object)

In [46]:
# create a pivot table with the mean of score by hyperparameter
pd.pivot_table(concatenated_df, values='accuracy', index=['classifier_name'],columns=['dataset_name'], aggfunc='mean')

dataset_name,BirdChicken,Chinatown,GunPoint,ItalyPowerDemand,ShapeletSim,SmoothSubspace,SyntheticControl
classifier_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ACF&PACF: n_random_points=100 nb_inst_per_class=50,0.85,0.985423,0.982222,0.952381,0.887037,0.906667,0.333333
BOSS,0.95,0.766764,1.0,0.908649,1.0,0.393333,0.966667
Catch22,0.9,0.930435,0.946667,0.896016,0.994444,0.866667,0.97
HIVE-COTEv1,1.0,0.979592,0.993333,0.962099,1.0,0.98,0.996667
InceptionTime,0.95,0.988338,1.0,0.965986,0.994444,0.986667,0.996667
ProximityForest,0.85,0.971014,1.0,0.965986,0.8,1.0,0.996667
RISE,0.95,0.959184,0.98,0.953353,0.783333,0.873333,0.666667
ROCKET,0.9,0.982507,1.0,0.968902,1.0,0.98,1.0
ResNet,0.9,0.976812,0.993333,0.965015,0.755556,1.0,0.996667
S-BOSS,0.95,0.784257,1.0,0.908649,1.0,0.393333,0.963333


In [47]:
# create a pivot table with the variatioon of score by hyperparameter
pd.pivot_table(concatenated_df, values='accuracy', index=['classifier_name'],columns=['dataset_name'], aggfunc='var')


dataset_name,BirdChicken,Chinatown,GunPoint,ItalyPowerDemand,ShapeletSim,SmoothSubspace
classifier_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ACF&PACF: n_random_points=100 nb_inst_per_class=50,0.0,0.0,1.5e-05,7e-06,1e-05,0.000178


In [48]:
df_perf=pd.pivot_table(concatenated_df, values='accuracy', index=['classifier_name','dataset_name'], aggfunc='mean')
df_perf=df_perf.reset_index()
draw_cd_diagram(df_perf, labels=True, title="Overall comparison", name_file='cd-diagram_overall_comparison.png')
_, average_ranks, _ = wilcoxon_holm(df_perf=df_perf)
min_rank= min(average_ranks)
average_ranks=pd.DataFrame(average_ranks)

findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial


['ACF&PACF: n_random_points=100 nb_inst_per_class=50' 'BOSS' 'Catch22'
 'HIVE-COTEv1' 'InceptionTime' 'ProximityForest' 'RISE' 'ROCKET' 'ResNet'
 'S-BOSS' 'STC' 'TS-CHIEF' 'TSF' 'WEASEL' 'cBOSS']
ACF&PACF: n_random_points=100 nb_inst_per_class=50    0.0
BOSS                                                  0.0
Catch22                                               0.0
HIVE-COTEv1                                           0.0
InceptionTime                                         1.0
ProximityForest                                       0.0
RISE                                                  0.0
ROCKET                                                2.0
ResNet                                                0.0
S-BOSS                                                0.0
STC                                                   0.0
TS-CHIEF                                              0.0
TSF                                                   0.0
WEASEL                                            

findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial
findfont: Generic family 'sans-serif' not found because

['ACF&PACF: n_random_points=100 nb_inst_per_class=50' 'BOSS' 'Catch22'
 'HIVE-COTEv1' 'InceptionTime' 'ProximityForest' 'RISE' 'ROCKET' 'ResNet'
 'S-BOSS' 'STC' 'TS-CHIEF' 'TSF' 'WEASEL' 'cBOSS']
ACF&PACF: n_random_points=100 nb_inst_per_class=50    0.0
BOSS                                                  0.0
Catch22                                               0.0
HIVE-COTEv1                                           0.0
InceptionTime                                         1.0
ProximityForest                                       0.0
RISE                                                  0.0
ROCKET                                                2.0
ResNet                                                0.0
S-BOSS                                                0.0
STC                                                   0.0
TS-CHIEF                                              0.0
TSF                                                   0.0
WEASEL                                            

In [49]:
average_ranks

Unnamed: 0,0
Catch22,11.928571
RISE,10.785714
ACF&PACF: n_random_points=100 nb_inst_per_class=50,10.5
BOSS,9.714286
S-BOSS,9.714286
cBOSS,9.285714
TSF,9.0
WEASEL,8.357143
ResNet,7.5
STC,7.071429
