### Experiments: Comparing RSAST with SAST Ridge, HIVE-COTE, ROCKET, TS-CHIEF

In [None]:
#configure directory to import sast libraries
import sys 
import os 
#add sast library path
file_path = os.path.expanduser("~/random_sast/sast")
sys.path.append(file_path)
#add cd_diagram library path
file_path = os.path.expanduser("~/random_sast/cd_diagram")
sys.path.append(file_path)

sys.path

In [39]:
from sast import *
from sktime.datasets import load_UCR_UEA_dataset, tsc_dataset_names
from sktime.classification.kernel_based import RocketClassifier
import time
import pandas as pd
from sktime.transformations.panel.shapelet_transform import ShapeletTransform
from utils import plot_most_important_feature_on_ts


In [45]:
ds_sens = tsc_dataset_names.univariate_equal_length

ds = pd.read_excel("DataSetsUCLASummary.xlsx")
ds = ds[ds["USED SAST"]=="Y"]
#ds = ds[ds["USED RSAST ADA"].isna()]

#ds_sens = ['ACSF1', 'Adiac', 'ArrowHead', 'Beef', 'BeetleFly', 'BirdChicken', 'BME', 'Car', 'CBF', 'Chinatown', 'ChlorineConcentration', 'CinCECGTorso', 'Coffee']

ds_sens = ds.Name.unique()
ds_sens =  ['SyntheticControl']
max_ds=len(ds_sens) #exploring dataset in UEA & UCR Time Series Classification Repository
print(max_ds)
print(ds_sens)


1
['SyntheticControl']


In [46]:
#define numbers of runs of the experiment
runs = 3

not_found_ds =[]
df_result = {}
list_score = []
list_time = []
list_dataset = []
list_hyperparameter = []
list_method = []
list_rpoint = []
list_nb_per_class = []


for ds in ds_sens:
    
    try:
        
        X_train, y_train = load_UCR_UEA_dataset(name=ds, extract_path='data', split="train", return_type="numpy2d")
        X_test, y_test = load_UCR_UEA_dataset(name=ds, extract_path='data', split="test", return_type="numpy2d")
        print("ds="+ds)
    except:
        print("not found ds="+ds)
        not_found_ds.append(ds)
        continue

    for i in range(runs):
        
        
        k=1
        
        print("ACF&PACF: n_random_points= (lenthg ts)//2"+" nb_inst_per_class="+str(k))
        start = time.time()
        random_state = None
        rsast_ridge = RSAST(half_len=True,nb_inst_per_class=k, len_method="both")
        rsast_ridge.fit(X_train, y_train)
        end = time.time()
        list_score.append(rsast_ridge.score(X_test,y_test))

        list_time.append(end-start)
        list_dataset.append(ds)
        list_hyperparameter.append("ACF&PACF: n_random_points= (lenthg ts)//2"+" nb_inst_per_class="+str(k))
        list_rpoint.append("(lenthg ts)//2")
        list_nb_per_class.append(str(k))
        list_method.append("Rsast")

        
        '''
        p=1
        k=10
        print("RSAST Max PACF: n_random_points="+str(p)+" nb_inst_per_class="+str(k))
        start = time.time()
        random_state = None
        rsast_ridge = RSAST(n_random_points=p,nb_inst_per_class=k, len_method="Max PACF", classifier=RidgeClassifierCV())
        rsast_ridge.fit(X_train, y_train)
        end = time.time()
        list_score.append(rsast_ridge.score(X_test,y_test))
        list_time.append(end-start)
        list_dataset.append(ds)
        list_hyperparameter.append("Max PACF: n_random_points="+str(p)+" nb_inst_per_class="+str(k))
        list_rpoint.append(str(p))
        list_nb_per_class.append(str(k))
        list_method.append("Max PACF: Rsast")
        '''
        print("ROCKET: kernels=10_000")
        start = time.time()
        rocket= RocketClassifier(num_kernels=10_000)
        rocket.fit(X_train,y_train)
        end = time.time()
        list_score.append(rocket.score(X_test,y_test))
        list_time.append(end-start)
        list_dataset.append(ds)
        list_hyperparameter.append("num_kernels=10_000")
        list_rpoint.append(str(0))
        list_nb_per_class.append(str(0))
        list_method.append("Rocket")
        
        '''
        min_shp_length = 3
        print("STC: min_shapelet_length="+str(min_shp_length)+" max_shapelet_length="+str(np.inf))
        
        max_shp_length = X_train.shape[1]
        start = time.time()
        #Creating a ShapeletTransform and fitting
        st = ShapeletTransform(min_shapelet_length=min_shp_length, max_shapelet_length=np.inf)
        X_train_sktime = from_2d_array_to_nested(pd.DataFrame(X_train))
        X_test_sktime = from_2d_array_to_nested(pd.DataFrame(X_test))
        st.fit(X_train_sktime, y_train)
        #Making the tranformation of time series and training a RandomForestClassifier model
        X_test_transformed = st.transform(X_test_sktime)
        X_train_transformed = st.transform(X_train_sktime)
        clf = RandomForestClassifier()
        clf.fit(X_train_transformed, y_train)
        
        end = time.time()
        list_score.append(clf.score(X_test_transformed, y_test))
        list_time.append(end-start)
        list_dataset.append(ds)
        list_hyperparameter.append("min_shapelet_length="+str(min_shp_length)+" max_shapelet_length="+str(max_shp_length))
        list_rpoint.append(str(0))
        list_nb_per_class.append(str(0))
        list_method.append("STC")  
        '''
        
        min_shp_length = 3
        max_shp_length = X_train.shape[1]
        print("SAST: min_shapelet_length="+str(min_shp_length)+" max_shapelet_length="+str(X_train.shape[1]))
        start = time.time()
        
        candidate_lengths = np.arange(min_shp_length, max_shp_length+1)
        random_state = None
        sast_ridge = SAST(cand_length_list=candidate_lengths,
                                nb_inst_per_class=1, 
                                random_state=random_state, classifier=RidgeClassifierCV())
        sast_ridge.fit(X_train, y_train)
        end = time.time()    
        list_score.append(sast_ridge.score(X_test, y_test))
        list_time.append(end-start)
        list_dataset.append(ds)
        list_hyperparameter.append("min_shapelet_length="+str(min_shp_length)+" max_shapelet_length="+str(max_shp_length))
        list_rpoint.append(str(0))
        list_nb_per_class.append(str(0))
        list_method.append("SAST")  
        

df_result['accuracy']=list_score
df_result['time']=list_time
df_result['dataset_name']=list_dataset
df_result['hyperparameter']=list_hyperparameter
df_result['rpoint']=list_rpoint
df_result['nb_per_class']=list_nb_per_class
df_result['classifier_name']=list_method
df_result=pd.DataFrame(df_result)
# export a overall dataset with the comparison
df_result.to_csv("results_comparison_accuracy/df_overall_comparison_results.csv")

ds=SyntheticControl
ACF&PACF: n_random_points= (lenthg ts)//2 nb_inst_per_class=1
RidgeClassifierCV:size training300<= kernels1650
ROCKET: kernels=10_000
SAST: min_shapelet_length=3 max_shapelet_length=60
ACF&PACF: n_random_points= (lenthg ts)//2 nb_inst_per_class=1
RidgeClassifierCV:size training300<= kernels1260
ROCKET: kernels=10_000
SAST: min_shapelet_length=3 max_shapelet_length=60
ACF&PACF: n_random_points= (lenthg ts)//2 nb_inst_per_class=1
RidgeClassifierCV:size training300<= kernels1500
ROCKET: kernels=10_000
SAST: min_shapelet_length=3 max_shapelet_length=60


In [26]:
df_result=pd.read_csv("results_comparison_accuracy/df_overall_comparison_results.csv")
df_result.columns

Index(['Unnamed: 0', 'accuracy', 'time', 'dataset_name', 'hyperparameter',
       'rpoint', 'nb_per_class', 'classifier_name'],
      dtype='object')

In [50]:
pd.pivot_table(df_result, values=['time'], index=['classifier_name'],columns=['dataset_name'], aggfunc='mean')

Unnamed: 0_level_0,time
dataset_name,SyntheticControl
classifier_name,Unnamed: 1_level_2
Rocket,6.287842
Rsast,6.521233
SAST,38.480254


In [49]:
df_pivot=pd.pivot_table(df_result, values='accuracy', index=['classifier_name','dataset_name'], aggfunc='mean')
df_pivot=df_pivot.reset_index()
pd.pivot_table(df_pivot, values=['accuracy'], index=['classifier_name'],columns=['dataset_name'], aggfunc='mean')

Unnamed: 0_level_0,accuracy
dataset_name,SyntheticControl
classifier_name,Unnamed: 1_level_2
Rocket,0.997778
Rsast,0.87
SAST,0.975556


In [51]:
from cd_function import *

#generate hyperparameter tuning cd diagrams
draw_cd_diagram(df_pivot, labels=True, title="RSAST vs other methods", name_file='images_cd_diagram/cd-diagram_overall_comparison_other_method2.png')


the null hypothesis over the entire classifiers cannot be rejected


In [30]:
# create a pivot table with the mean of score by hyperparameter
score_by_method=pd.pivot_table(df_result, values='accuracy', index=['classifier_name'],columns=['dataset_name'], aggfunc='mean')

t_score_by_method=score_by_method.transpose()
t_score_by_method.columns
#t_score_by_method[[hyperparameter,"STC","ROCKET"]]


Index(['Rocket', 'Rsast', 'SAST'], dtype='object', name='classifier_name')

In [33]:
for method in t_score_by_method.columns:
    x=t_score_by_method["Rsast"]
    y=t_score_by_method[method]

    # set the style of the plot
    plt.style.use('seaborn')

    # create the plot
    fig, ax = plt.subplots(figsize=(8, 6))

    # plot the line
    ax.plot([0,1], [0,1])

    # plot the points
    ax.scatter(x, y, color='blue', edgecolor='blue', s=30, zorder=10)

    # add labels and title
    ax.set_xlabel('RSAST')
    ax.set_ylabel(method)
    ax.set_title('Accuracy')


    # set the background color
    ax.set_facecolor('lightgrey')
    plt.savefig('images_one_vs_one_comparison/RSASTvs'+method+'2.png')

  plt.style.use('seaborn')
  plt.style.use('seaborn')
  plt.style.use('seaborn')


## Plotting top most important features over the generators

In [47]:
for c, ts in rsast_ridge.kernels_generators_.items():
    fname = f'images_features_exp/chinatown-r_ridge-class{c}-top5-features-on-ref-ts.jpg'
    plot_most_important_feature_on_ts(ts.squeeze(), c, rsast_ridge.kernel_orig_, rsast_ridge.classifier.coef_[0], limit=5, offset=0, fname=fname) # plot only the first model one-vs-all model's features


  plt.show();
  plt.show();
  fig, axes = plt.subplots(1, max_, sharey=True, figsize=(3*max_, 3), tight_layout=True)
  plt.show();
  plt.show();
  plt.show();
  plt.show();


In [48]:
for c, ts in sast_ridge.kernels_generators_.items():
    fname = f'images_features_exp/chinatown-ridge-class{c}-top5-features-on-ref-ts.jpg'
    plot_most_important_feature_on_ts(ts.squeeze(), c, sast_ridge.kernel_orig_, sast_ridge.classifier.coef_[0], limit=5, offset=0, fname=fname) # plot only the first model one-vs-all model's features


  plt.show();
  plt.show();
  plt.show();
  plt.show();
  plt.show();
  plt.show();
