## Experiments: Comparing RSAST with other methods

In [31]:
from sast.sast import *
from sktime.datasets import load_UCR_UEA_dataset, tsc_dataset_names
from sktime.classification.kernel_based import RocketClassifier
import time
import pandas as pd
from sktime.transformations.panel.shapelet_transform import ShapeletTransform
import matplotlib.pyplot as plt
import os

hyperparameter="ACF&PACF: n_random_points=10 nb_inst_per_class=100"

### Reading Datasets

In [32]:
# Set directory where the csv files are located
directory = '/home/nirojasvar/random_sast/ResultsByClassifier'

# Create an empty list to store the dataframes
dfs = []

# Loop through all files in the directory
for filename in os.listdir(directory):
    # Check if the file starts with "df_overall_tunning" and ends with ".csv"
    if filename.endswith(".csv"):
        # Read the csv file into a dataframe
        filepath = os.path.join(directory, filename)
        df = pd.read_csv(filepath)
        # Append the dataframe to the list
        df['filename']=filename
        dfs.append(df)

# Concatenate all the dataframes in the list into one dataframe
df_other_methods = pd.concat(dfs, ignore_index=True)
df_other_methods=df_other_methods[['filename','folds:','0']]
df_other_methods['method']=df_other_methods['filename'].str.split('_').str[0]
df_other_methods
df_other_methods=df_other_methods.rename(columns={'folds:':'dataset','0':'score'})
df_other_methods=df_other_methods[['dataset','score','method']]

# Set directory where the csv files are located
directory = '/home/nirojasvar/random_sast/ResultsRsast'

# Create an empty list to store the dataframes
dfs = []

# Loop through all files in the directory
for filename in os.listdir(directory):
    # Check if the file starts with "df_overall_tunning" and ends with ".csv"
    if filename.startswith("df_overall_tunning") and filename.endswith(".csv"):
        # Read the csv file into a dataframe
        filepath = os.path.join(directory, filename)
        df = pd.read_csv(filepath)
        # Append the dataframe to the list
        dfs.append(df)

# Concatenate all the dataframes in the list into one dataframe
df_result = pd.concat(dfs, ignore_index=True)
df_result.columns

Index(['Unnamed: 0', 'accuracy', 'time', 'cweights_time', 'fsubsequence_time',
       'tdataset_time', 'tclassifier_time', 'dataset_name', 'classifier_name',
       'rpoint', 'nb_per_class', 'method', 'len_method'],
      dtype='object')

In [33]:
df_result=df_result[['accuracy','dataset_name','classifier_name']]

df_result=df_result[df_result["classifier_name"]==hyperparameter]
df_result.dataset_name.values


array(['SmoothSubspace', 'GunPoint', 'ItalyPowerDemand', 'ShapeletSim',
       'ItalyPowerDemand', 'Chinatown', 'Coffee', 'BirdChicken',
       'SyntheticControl', 'Chinatown', 'GunPoint', 'ShapeletSim',
       'SmoothSubspace', 'ShapeletSim', 'Chinatown', 'SmoothSubspace',
       'BirdChicken', 'ItalyPowerDemand', 'GunPoint', 'BirdChicken'],
      dtype=object)

In [34]:

df_other_methods=df_other_methods.rename(columns={"dataset":"dataset_name","score":"accuracy","method":"classifier_name"})
df_other_methods = df_other_methods[df_other_methods['dataset_name'].isin(df_result.dataset_name.values)]
df_other_methods.columns

Index(['dataset_name', 'accuracy', 'classifier_name'], dtype='object')

In [35]:
# concatenate the two dataframes
concatenated_df = pd.concat([df_other_methods, df_result])

# reset the index of the concatenated dataframe
concatenated_df = concatenated_df.reset_index(drop=True)


In [36]:
concatenated_df['classifier_name'].unique()

array(['ROCKET', 'HIVE-COTEv1', 'BOSS', 'RISE', 'TSF', 'InceptionTime',
       'TS-CHIEF', 'S-BOSS', 'STC', 'ResNet', 'ProximityForest', 'WEASEL',
       'Catch22', 'cBOSS',
       'ACF&PACF: n_random_points=10 nb_inst_per_class=100'], dtype=object)

### Overall analysis

#### Accuracy: RSAST vs Other methods

In [37]:
# create a pivot table with the mean of score by hyperparameter
score_by_method=pd.pivot_table(concatenated_df, values='accuracy', index=['classifier_name'],columns=['dataset_name'], aggfunc='mean')
score_by_method


dataset_name,BirdChicken,Chinatown,Coffee,GunPoint,ItalyPowerDemand,ShapeletSim,SmoothSubspace,SyntheticControl
classifier_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ACF&PACF: n_random_points=10 nb_inst_per_class=100,0.85,0.980564,1.0,0.975556,0.952705,0.887037,0.915556,0.276667
BOSS,0.95,0.766764,1.0,1.0,0.908649,1.0,0.393333,0.966667
Catch22,0.9,0.930435,1.0,0.946667,0.896016,0.994444,0.866667,0.97
HIVE-COTEv1,1.0,0.979592,1.0,0.993333,0.962099,1.0,0.98,0.996667
InceptionTime,0.95,0.988338,1.0,1.0,0.965986,0.994444,0.986667,0.996667
ProximityForest,0.85,0.971014,1.0,1.0,0.965986,0.8,1.0,0.996667
RISE,0.95,0.959184,1.0,0.98,0.953353,0.783333,0.873333,0.666667
ROCKET,0.9,0.982507,1.0,1.0,0.968902,1.0,0.98,1.0
ResNet,0.9,0.976812,1.0,0.993333,0.965015,0.755556,1.0,0.996667
S-BOSS,0.95,0.784257,1.0,1.0,0.908649,1.0,0.393333,0.963333


In [38]:
t_score_by_method=score_by_method.transpose()
t_score_by_method[[hyperparameter,"STC"]]

classifier_name,ACF&PACF: n_random_points=10 nb_inst_per_class=100,STC
dataset_name,Unnamed: 1_level_1,Unnamed: 2_level_1
BirdChicken,0.85,0.95
Chinatown,0.980564,0.96793
Coffee,1.0,1.0
GunPoint,0.975556,0.993333
ItalyPowerDemand,0.952705,0.955296
ShapeletSim,0.887037,1.0
SmoothSubspace,0.915556,0.933333
SyntheticControl,0.276667,0.996667


In [39]:
x=t_score_by_method[hyperparameter]
y=t_score_by_method["STC"]

# set the style of the plot
plt.style.use('seaborn')

# create the plot
fig, ax = plt.subplots(figsize=(8, 6))

# plot the line
ax.plot([0,1], [0,1])

# plot the points
ax.scatter(x, y, color='blue', edgecolor='blue', s=30, zorder=10)

# add labels and title
ax.set_xlabel('RSAST')
ax.set_ylabel('STC')
ax.set_title('Accuracy')

# set the background color
ax.set_facecolor('lightgrey')
plt.savefig('RSASTvsSTC.png')


  plt.style.use('seaborn')


#### Generate CD Diagram comparison

In [40]:
# create a pivot table with the variation of score by hyperparameter
pd.pivot_table(concatenated_df, values='accuracy', index=['classifier_name'],columns=['dataset_name'], aggfunc='var')


dataset_name,BirdChicken,Chinatown,GunPoint,ItalyPowerDemand,ShapeletSim,SmoothSubspace
classifier_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ACF&PACF: n_random_points=10 nb_inst_per_class=100,0.0,3.7e-05,1.5e-05,2.5e-05,1e-05,0.000193


In [41]:
from cd_diagram.cd_function import *
df_perf=pd.pivot_table(concatenated_df, values='accuracy', index=['classifier_name','dataset_name'], aggfunc='mean')
df_perf=df_perf.reset_index()
draw_cd_diagram(df_perf, labels=True, title="Overall Comparison Other Methods", name_file='cd-diagram_overall_comparison_other_method.png')
_, average_ranks, _ = wilcoxon_holm(df_perf=df_perf)
min_rank= min(average_ranks)
average_ranks=pd.DataFrame(average_ranks)

['ACF&PACF: n_random_points=10 nb_inst_per_class=100' 'BOSS' 'Catch22'
 'HIVE-COTEv1' 'InceptionTime' 'ProximityForest' 'RISE' 'ROCKET' 'ResNet'
 'S-BOSS' 'STC' 'TS-CHIEF' 'TSF' 'WEASEL' 'cBOSS']




ACF&PACF: n_random_points=10 nb_inst_per_class=100    0.0
BOSS                                                  0.0
Catch22                                               0.0
HIVE-COTEv1                                           0.0
InceptionTime                                         1.0
ProximityForest                                       0.0
RISE                                                  0.0
ROCKET                                                2.0
ResNet                                                0.0
S-BOSS                                                0.0
STC                                                   0.0
TS-CHIEF                                              0.0
TSF                                                   0.0
WEASEL                                                0.0
cBOSS                                                 0.0
dtype: float64
Catch22                                               11.4375
ACF&PACF: n_random_points=10 nb_inst_per_class=100   



In [42]:
average_ranks

Unnamed: 0,0
Catch22,11.4375
ACF&PACF: n_random_points=10 nb_inst_per_class=100,10.5625
RISE,10.3125
BOSS,9.5
S-BOSS,9.5
cBOSS,9.125
TSF,8.75
WEASEL,8.3125
ResNet,7.5625
STC,7.1875
