### Select Datasets for comparing RSAST with SAST Ridge, HIVE-COTE, ROCKET, TS-CHIEF

In [18]:
from sast.sast import *
from sktime.datasets import load_UCR_UEA_dataset, tsc_dataset_names
from sktime.classification.kernel_based import RocketClassifier
import time
import pandas as pd
from sktime.transformations.panel.shapelet_transform import ShapeletTransform

In [19]:
ds_sens = tsc_dataset_names.univariate_equal_length
#ds_sens = ['ACSF1', 'Adiac', 'ArrowHead', 'Beef', 'BeetleFly', 'BirdChicken', 'BME', 'Car', 'CBF', 'Chinatown', 'ChlorineConcentration', 'CinCECGTorso', 'Coffee']
ds_sens = ['Chinatown','Coffee','BirdChicken']
max_ds=len(ds_sens) #exploring dataset in UEA & UCR Time Series Classification Repository
#print(ds_sens)
print(ds_sens)


['Chinatown', 'Coffee']


In [20]:
runs = 3
#list
df_result = {}
list_score = []
list_time = []
list_dataset = []
list_hyperparameter = []
list_method = []
list_rpoint = []
list_nb_per_class = []


for ds in ds_sens:
    print("ds="+ds)
    X_train, y_train = load_UCR_UEA_dataset(name=ds, extract_path='data', split="train", return_type="numpy2d")
    X_test, y_test = load_UCR_UEA_dataset(name=ds, extract_path='data', split="test", return_type="numpy2d")
    for i in range(runs):
       
        p=100
        k=10
        print("RSAST: n_random_points="+str(p)+" nb_inst_per_class="+str(k))
        start = time.time()
        random_state = None
        rsast_ridge = RSAST(n_random_points=p,nb_inst_per_class=k, classifier=RidgeClassifierCV())
        rsast_ridge.fit(X_train, y_train)
        end = time.time()
        list_score.append(rsast_ridge.score(X_test,y_test))
        list_time.append(end-start)
        list_dataset.append(ds)
        list_hyperparameter.append("n_random_points="+str(p)+" nb_inst_per_class="+str(k))
        list_rpoint.append(str(p))
        list_nb_per_class.append(str(k))
        list_method.append("Rsast")

        p=10
        k=100
        print("RSAST: n_random_points="+str(p)+" nb_inst_per_class="+str(k))
        start = time.time()
        random_state = None
        rsast_ridge = RSAST(n_random_points=p,nb_inst_per_class=k, classifier=RidgeClassifierCV())
        rsast_ridge.fit(X_train, y_train)
        end = time.time()
        list_score.append(rsast_ridge.score(X_test,y_test))
        list_time.append(end-start)
        list_dataset.append(ds)
        list_hyperparameter.append("n_random_points="+str(p)+" nb_inst_per_class="+str(k))
        list_rpoint.append(str(p))
        list_nb_per_class.append(str(k))
        list_method.append("Rsast")
        
        print("ROCKET: kernels=10_000")
        start = time.time()
        rocket= RocketClassifier(num_kernels=10_000)
        rocket.fit(X_train,y_train)
        end = time.time()
        list_score.append(rocket.score(X_test,y_test))
        list_time.append(end-start)
        list_dataset.append(ds)
        list_hyperparameter.append("num_kernels=10_000")
        list_rpoint.append(str(0))
        list_nb_per_class.append(str(0))
        list_method.append("Rocket")

        print("STC: min_shapelet_length="+str(min_shp_length)+" max_shapelet_length="+str(np.inf))
        min_shp_length = 3
        max_shp_length = X_train.shape[1]
        start = time.time()
        #Creating a ShapeletTransform and fitting
        st = ShapeletTransform(min_shapelet_length=min_shp_length, max_shapelet_length=np.inf)
        X_train_sktime = from_2d_array_to_nested(pd.DataFrame(X_train))
        X_test_sktime = from_2d_array_to_nested(pd.DataFrame(X_test))
        st.fit(X_train_sktime, y_train)
        #Making the tranformation of time series and training a RandomForestClassifier model
        X_test_transformed = st.transform(X_test_sktime)
        X_train_transformed = st.transform(X_train_sktime)
        clf = RandomForestClassifier()
        clf.fit(X_train_transformed, y_train)

        end = time.time()
        list_score.append(clf.score(X_test_transformed, y_test))
        list_time.append(end-start)
        list_dataset.append(ds)
        list_hyperparameter.append("min_shapelet_length="+str(min_shp_length)+" max_shapelet_length="+str(max_shp_length))
        list_rpoint.append(str(0))
        list_nb_per_class.append(str(0))
        list_method.append("STC")       
        
        

df_result['accuracy']=list_score
df_result['time']=list_time
df_result['dataset_name']=list_dataset
df_result['classifier_name']=list_hyperparameter
df_result['rpoint']=list_rpoint
df_result['nb_per_class']=list_nb_per_class
df_result['method']=list_method
df_result=pd.DataFrame(df_result)
# export a overall dataset with the comparison
df_result.to_csv("df_overall_comparison_results.csv")

ds=Chinatown
RSAST: n_random_points=100 nb_inst_per_class=10
RSAST: n_random_points=10 nb_inst_per_class=100
ROCKET: kernels=10_000
STC: min_shapelet_length=3 max_shapelet_length=inf
RSAST: n_random_points=100 nb_inst_per_class=10
RSAST: n_random_points=10 nb_inst_per_class=100
ROCKET: kernels=10_000
STC: min_shapelet_length=3 max_shapelet_length=inf
RSAST: n_random_points=100 nb_inst_per_class=10
RSAST: n_random_points=10 nb_inst_per_class=100
ROCKET: kernels=10_000
STC: min_shapelet_length=3 max_shapelet_length=inf
RSAST: n_random_points=100 nb_inst_per_class=10
RSAST: n_random_points=10 nb_inst_per_class=100
ROCKET: kernels=10_000
STC: min_shapelet_length=3 max_shapelet_length=inf
ds=Coffee
RSAST: n_random_points=100 nb_inst_per_class=10
RSAST: n_random_points=10 nb_inst_per_class=100
ROCKET: kernels=10_000
STC: min_shapelet_length=3 max_shapelet_length=inf


KeyboardInterrupt: 

In [None]:
df_result=pd.read_csv("df_overall_comparison_results.csv")

In [12]:

# create and export a pivot table with the mean of score of different methods
pivot_table = pd.pivot_table(df_result, values='accuracy', index=['classifier_name','dataset_name'], aggfunc='mean')
pivot_table.to_csv("df_pivot_overall_comparison.csv") 

In [13]:
# create a pivot table with the mean of score by hyperparameter
pd.pivot_table(df_result, values='accuracy', index=['method','classifier_name'],columns=['dataset_name'], aggfunc='mean')

Unnamed: 0_level_0,dataset_name,Chinatown
method,classifier_name,Unnamed: 2_level_1
Rocket,num_kernels=10_000,0.981535
Rsast,n_random_points=10 nb_inst_per_class=100,0.97862
Rsast,n_random_points=100 nb_inst_per_class=10,0.984451
STC,min_shapelet_length=3 max_shapelet_length=24,0.973761


In [14]:
# create a pivot table with the variatioon of score by hyperparameter
pd.pivot_table(df_result, values='accuracy', index=['method','classifier_name'],columns=['dataset_name'], aggfunc='var')

Unnamed: 0_level_0,dataset_name,Chinatown
method,classifier_name,Unnamed: 2_level_1
Rocket,num_kernels=10_000,3e-06
Rsast,n_random_points=10 nb_inst_per_class=100,2e-05
Rsast,n_random_points=100 nb_inst_per_class=10,3e-06
STC,min_shapelet_length=3 max_shapelet_length=24,0.0


In [15]:
# create a pivot table with the mean of time spent in minutes
pd.pivot_table(df_result, values='time', index=['method','classifier_name'],columns=['dataset_name'], aggfunc='mean')/60

Unnamed: 0_level_0,dataset_name,Chinatown
method,classifier_name,Unnamed: 2_level_1
Rocket,num_kernels=10_000,0.138847
Rsast,n_random_points=10 nb_inst_per_class=100,0.00214
Rsast,n_random_points=100 nb_inst_per_class=10,0.014794
STC,min_shapelet_length=3 max_shapelet_length=24,1.427007
