## Experiments: Comparing RSAST with other methods

In [75]:
#configure directory to import sast libraries
import sys 
import os 
#add sast library path
file_path = os.path.expanduser("~/random_sast/sast")
sys.path.append(file_path)
#add cd_diagram library path
file_path = os.path.expanduser("~/random_sast/cd_diagram")
sys.path.append(file_path)

#sys.path

In [76]:
from sast import *
from sktime.datasets import load_UCR_UEA_dataset, tsc_dataset_names
from sktime.classification.kernel_based import RocketClassifier
import time
import pandas as pd
from sktime.transformations.panel.shapelet_transform import ShapeletTransform
import matplotlib.pyplot as plt
import os

hyperparameter='ACF&PACF: n_random_points=100 nb_inst_per_class=50'

### Reading Datasets

In [77]:
# Set directory where the csv files are located
directory = os.getcwd()+'/ResultsByClassifier'

# Create an empty list to store the dataframes
dfs = []

# Loop through all files in the directory
for filename in os.listdir(directory):
    # Check if the file starts with "df_overall_tunning" and ends with ".csv"
    if filename.endswith(".csv"):
        # Read the csv file into a dataframe
        filepath = os.path.join(directory, filename)
        df = pd.read_csv(filepath)
        # Append the dataframe to the list
        df['filename']=filename
        dfs.append(df)

# Concatenate all the dataframes in the list into one dataframe
df_other_methods = pd.concat(dfs, ignore_index=True)
df_other_methods=df_other_methods[['filename','folds:','0']]
df_other_methods['method']=df_other_methods['filename'].str.split('_').str[0]
df_other_methods
df_other_methods=df_other_methods.rename(columns={'folds:':'dataset','0':'score'})
df_other_methods=df_other_methods[['dataset','score','method']]



In [78]:
df_other_methods['method'].unique()

array(['ROCKET', 'HIVE-COTEv1', 'BOSS', 'RISE', 'TSF', 'InceptionTime',
       'TS-CHIEF', 'S-BOSS', 'STC', 'ResNet', 'ProximityForest', 'WEASEL',
       'Catch22', 'cBOSS'], dtype=object)

In [79]:
# Set directory where the csv files are located
directory = os.getcwd()+'/ResultsRsast'

# Create an empty list to store the dataframes
dfs = []

# Loop through all files in the directory
for filename in os.listdir(directory):
    # Check if the file starts with "df_overall_tunning" and ends with ".csv"
    if filename.startswith("df_overall_tunning") and filename.endswith(".csv"):
        # Read the csv file into a dataframe
        filepath = os.path.join(directory, filename)
        df = pd.read_csv(filepath)
        # Append the dataframe to the list
        dfs.append(df)

# Concatenate all the dataframes in the list into one dataframe
df_result = pd.concat(dfs, ignore_index=True)
df_result.columns

Index(['Unnamed: 0', 'accuracy', 'time', 'cweights_time', 'fsubsequence_time',
       'tdataset_time', 'tclassifier_time', 'dataset_name', 'classifier_name',
       'rpoint', 'nb_per_class', 'method', 'len_method'],
      dtype='object')

In [80]:
df_result=df_result[['accuracy','dataset_name','classifier_name','time']]

df_result=df_result[df_result["classifier_name"]==hyperparameter]
df_result.dataset_name.values


array(['SmoothSubspace', 'SonyAIBORobotSurface2', 'GunPoint',
       'ItalyPowerDemand', 'DistalPhalanxOutlineCorrect',
       'DistalPhalanxOutlineAgeGroup', 'ShapeletSim', 'SyntheticControl',
       'SonyAIBORobotSurface1', 'DistalPhalanxOutlineAgeGroup',
       'ItalyPowerDemand', 'Chinatown', 'Coffee', 'BirdChicken',
       'SyntheticControl', 'Chinatown', 'SyntheticControl',
       'SonyAIBORobotSurface2', 'SonyAIBORobotSurface1', 'GunPoint',
       'DistalPhalanxOutlineAgeGroup', 'ShapeletSim',
       'SonyAIBORobotSurface1', 'SmoothSubspace', 'ShapeletSim',
       'Chinatown', 'SmoothSubspace', 'BirdChicken', 'ItalyPowerDemand',
       'SonyAIBORobotSurface2', 'GunPoint', 'BirdChicken'], dtype=object)

In [81]:

df_other_methods=df_other_methods.rename(columns={"dataset":"dataset_name","score":"accuracy","method":"classifier_name"})
df_other_methods = df_other_methods[df_other_methods['dataset_name'].isin(df_result.dataset_name.values)]
df_other_methods.columns

Index(['dataset_name', 'accuracy', 'classifier_name'], dtype='object')

In [82]:
# concatenate the two dataframes
concatenated_df = pd.concat([df_other_methods, df_result])

# reset the index of the concatenated dataframe
concatenated_df = concatenated_df.reset_index(drop=True)


In [83]:
concatenated_df['classifier_name'].unique()

array(['ROCKET', 'HIVE-COTEv1', 'BOSS', 'RISE', 'TSF', 'InceptionTime',
       'TS-CHIEF', 'S-BOSS', 'STC', 'ResNet', 'ProximityForest', 'WEASEL',
       'Catch22', 'cBOSS',
       'ACF&PACF: n_random_points=100 nb_inst_per_class=50'], dtype=object)

### Overall analysis

#### Accuracy: RSAST vs Other methods

In [92]:
# create a pivot table with the mean of score by hyperparameter
score_by_method=pd.pivot_table(concatenated_df, values='accuracy', index=['classifier_name'],columns=['dataset_name'], aggfunc='mean')



In [93]:
t_score_by_method=score_by_method.transpose()
t_score_by_method
#t_score_by_method[[hyperparameter,"STC","ROCKET"]]


classifier_name,ACF&PACF: n_random_points=100 nb_inst_per_class=50,BOSS,Catch22,HIVE-COTEv1,InceptionTime,ProximityForest,RISE,ROCKET,ResNet,S-BOSS,STC,TS-CHIEF,TSF,WEASEL,cBOSS
dataset_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
BirdChicken,0.85,0.95,0.9,1.0,0.95,0.85,0.95,0.9,0.9,0.95,0.95,0.95,0.8,0.9,1.0
Chinatown,0.985423,0.766764,0.930435,0.979592,0.988338,0.971014,0.959184,0.982507,0.976812,0.784257,0.96793,0.968116,0.982507,0.956268,0.950725
Coffee,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
DistalPhalanxOutlineAgeGroup,0.683453,0.733813,0.705036,0.769784,0.733813,0.733813,0.76259,0.748201,0.676259,0.733813,0.769784,0.748201,0.741007,0.769784,0.741007
DistalPhalanxOutlineCorrect,0.416667,0.73913,0.800725,0.778986,0.793478,0.804348,0.775362,0.771739,0.764493,0.73913,0.771739,0.757246,0.764493,0.775362,0.731884
GunPoint,0.982222,1.0,0.946667,0.993333,1.0,1.0,0.98,1.0,0.993333,1.0,0.993333,1.0,0.96,1.0,1.0
ItalyPowerDemand,0.952381,0.908649,0.896016,0.962099,0.965986,0.965986,0.953353,0.968902,0.965015,0.908649,0.955296,0.965015,0.966958,0.955296,0.923226
ShapeletSim,0.887037,1.0,0.994444,1.0,0.994444,0.8,0.783333,1.0,0.755556,1.0,1.0,1.0,0.461111,1.0,0.994444
SmoothSubspace,0.906667,0.393333,0.866667,0.98,0.986667,1.0,0.873333,0.98,1.0,0.393333,0.933333,1.0,0.986667,0.833333,0.52
SonyAIBORobotSurface1,0.809207,0.63228,0.851913,0.753744,0.878536,0.836938,0.821963,0.921797,0.956739,0.668885,0.75208,0.831947,0.808652,0.845258,0.537438


In [86]:
for method in t_score_by_method.columns.drop(hyperparameter):
    x=t_score_by_method[hyperparameter]
    y=t_score_by_method[method]

    # set the style of the plot
    plt.style.use('seaborn')

    # create the plot
    fig, ax = plt.subplots(figsize=(8, 6))

    # plot the line
    ax.plot([0,1], [0,1])

    # plot the points
    ax.scatter(x, y, color='blue', edgecolor='blue', s=30, zorder=10)

    # add labels and title
    ax.set_xlabel('RSAST')
    ax.set_ylabel(method)
    ax.set_title('Accuracy')


    # set the background color
    ax.set_facecolor('lightgrey')
    plt.savefig('images_one&one_comparison/RSASTvs'+method+'.png')


  plt.style.use('seaborn')
  plt.style.use('seaborn')
  plt.style.use('seaborn')
  plt.style.use('seaborn')
  plt.style.use('seaborn')
  plt.style.use('seaborn')
  plt.style.use('seaborn')
  plt.style.use('seaborn')
  plt.style.use('seaborn')
  plt.style.use('seaborn')
  plt.style.use('seaborn')
  plt.style.use('seaborn')
  plt.style.use('seaborn')
  plt.style.use('seaborn')


In [87]:
# create a pivot table with the mean of time by hyperparameter
time_per_method=pd.pivot_table(df_result, values='time', columns=['classifier_name'],index=['dataset_name'], aggfunc='mean')/60
time_per_method

classifier_name,ACF&PACF: n_random_points=100 nb_inst_per_class=50
dataset_name,Unnamed: 1_level_1
BirdChicken,13.528151
Chinatown,0.019082
Coffee,5.698254
DistalPhalanxOutlineAgeGroup,24.707465
DistalPhalanxOutlineCorrect,28.393505
GunPoint,5.642177
ItalyPowerDemand,0.136053
ShapeletSim,8.205093
SmoothSubspace,0.253635
SonyAIBORobotSurface1,0.116553


In [88]:
# create a pivot table with the variation of score by hyperparameter
pd.pivot_table(concatenated_df, values='accuracy', index=['classifier_name'],columns=['dataset_name'], aggfunc='var')


dataset_name,BirdChicken,Chinatown,DistalPhalanxOutlineAgeGroup,GunPoint,ItalyPowerDemand,ShapeletSim,SmoothSubspace,SonyAIBORobotSurface1,SonyAIBORobotSurface2,SyntheticControl
classifier_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ACF&PACF: n_random_points=100 nb_inst_per_class=50,0.0,0.0,0.0,1.5e-05,7e-06,1e-05,0.000178,3.4e-05,5e-06,0.001737


#### Generate CD Diagram comparison

In [89]:

from cd_function import *
df_perf=pd.pivot_table(concatenated_df, values='accuracy', index=['classifier_name','dataset_name'], aggfunc='mean')
df_perf=df_perf.reset_index()
draw_cd_diagram(df_perf, labels=True, title="Overall Comparison Other Methods", name_file='images_cd_diagram/cd-diagram_overall_comparison_other_method.png')




In [90]:
df_perf["classifier_name"].unique()

array(['ACF&PACF: n_random_points=100 nb_inst_per_class=50', 'BOSS',
       'Catch22', 'HIVE-COTEv1', 'InceptionTime', 'ProximityForest',
       'RISE', 'ROCKET', 'ResNet', 'S-BOSS', 'STC', 'TS-CHIEF', 'TSF',
       'WEASEL', 'cBOSS'], dtype=object)

In [91]:
_, average_ranks, _ = wilcoxon_holm(df_perf=df_perf)
min_rank= min(average_ranks)
average_ranks=pd.DataFrame(average_ranks)
average_ranks



Unnamed: 0,0
ACF&PACF: n_random_points=100 nb_inst_per_class=50,10.625
BOSS,10.583333
S-BOSS,10.416667
cBOSS,9.958333
Catch22,9.708333
TSF,9.416667
RISE,9.166667
ResNet,7.25
ProximityForest,6.958333
STC,6.916667
