<font size="+3"><mark>Processing times of SAX, 1d-SAX, ASTRIDE, and FASTRIDE</mark></font>

# Introduction

## README

_Associated GitHub repository: https://github.com/sylvaincom/astride._

## Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
from pathlib import Path
from time import process_time

import pprint

from src.utils import concatenate_df


import random

from src.segment_feature import SegmentFeature
from src.segmentation import Segmentation
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.utils import Bunch
from src.symbolic_signal_distance import SymbolicSignalDistance
from src.symbolization import Symbolization
from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from src.utils import load_ucr_dataset

from tslearn.piecewise import OneD_SymbolicAggregateApproximation, SymbolicAggregateApproximation
from tslearn.metrics.cysax import cydist_1d_sax, cydist_sax
from sklearn.base import BaseEstimator
from sklearn.model_selection import cross_val_score

from src.utils_sfa import test_SFA, train_SFA

## Configuration parameters

In [2]:
pp = pprint.PrettyPrinter()
cwd = Path.cwd()

In [3]:
DATASET_NAME_UCR = "ECG200"
N_SEGMENTS = 10
N_SYMBOLS = 9

N_SYMBOLS_MEAN = int(np.sqrt(N_SYMBOLS))
N_SYMBOLS_SLOPE = int(np.sqrt(N_SYMBOLS))
print(f"{N_SYMBOLS_MEAN = }")
print(f"{N_SYMBOLS_SLOPE = }")

N_SYMBOLS_MEAN = 3
N_SYMBOLS_SLOPE = 3


# Preparing the data

## Set up

In [4]:
b_load_ucr_dataset = load_ucr_dataset(DATASET_NAME_UCR)
y_train_test = b_load_ucr_dataset.y_train_test
y_train = b_load_ucr_dataset.y_train
y_test = b_load_ucr_dataset.y_test
l_train_test = b_load_ucr_dataset.l_train_test
l_train = b_load_ucr_dataset.l_train
l_test = b_load_ucr_dataset.l_test
X_train_test = b_load_ucr_dataset.X_train_test
X_train = b_load_ucr_dataset.X_train
X_test = b_load_ucr_dataset.X_test

N_SAMPLES = len(l_train_test[0])
print(N_SAMPLES)

96


In [5]:
print(len(X_train))
print(len(X_test))

100
100


In [6]:
scaler = TimeSeriesScalerMeanVariance()
scaled_X_train_test = scaler.fit_transform(X_train_test)

In [7]:
round(N_SEGMENTS / N_SAMPLES * 100, 2)

10.42

## Some code for vanilla SAX from `tslearn`

In [8]:
class MySAX(SymbolicAggregateApproximation):
    def _transform(self, X, y=None, *args, **kwargs):
        return Bunch(
            data=super()._transform(X=X, y=y),
            breakpoints_avg_=self.breakpoints_avg_,
            _X_fit_dims_=self._X_fit_dims_
        )

class DistInterfaceSAX(BaseEstimator):
    def fit(self, X, y=None, *args, **kwargs):
        self.X_transformed = X.data
        self.breakpoints_avg_ = X.breakpoints_avg_
        self._X_fit_dims_ = X._X_fit_dims_
        return self
    
    def distance_sax(self, sax1, sax2):
        return cydist_sax(
            sax1,
            sax2,
            self.breakpoints_avg_,
            self._X_fit_dims_[1]
        )
    
    def transform(self, X, y=None):
        distance_matrix = SymbolicSignalDistance.collection_dist(
            list_of_test_signals=X.data,
            list_of_train_signals=self.X_transformed,
            metric=self.distance_sax
        )
        return distance_matrix  # (n_samples_test, n_samples_train)

## Some code for 1d-SAX from `tslearn`

In [9]:
class MyOneD(OneD_SymbolicAggregateApproximation):
    def _transform(self, X, y=None, *args, **kwargs):
        return Bunch(
            data=super()._transform(X=X, y=y),
            breakpoints_avg_middle_=self.breakpoints_avg_middle_,
            breakpoints_slope_middle_=self.breakpoints_slope_middle_,
            _X_fit_dims_=self._X_fit_dims_
        )

class DistInterfaceOneD(BaseEstimator):
    def fit(self, X, y=None, *args, **kwargs):
        self.X_transformed = X.data
        self.breakpoints_avg_middle_ = X.breakpoints_avg_middle_
        self.breakpoints_slope_middle_ = X.breakpoints_slope_middle_
        self._X_fit_dims_ = X._X_fit_dims_
        return self
    
    def distance_1D_sax(self, sax1, sax2):
        return cydist_1d_sax(
            sax1,
            sax2,
            self.breakpoints_avg_middle_,
            self.breakpoints_slope_middle_,
            self._X_fit_dims_[1]
        )
    
    def transform(self, X, y=None):
        distance_matrix = SymbolicSignalDistance.collection_dist(
            list_of_test_signals=X.data,
            list_of_train_signals=self.X_transformed,
            metric=self.distance_1D_sax
        )
        return distance_matrix  # (n_samples_test, n_samples_train)

# Symbolization processing time

In [10]:
n_iter = 10

In [11]:
# Vanilla SAX

pipe_sax_symb = (
    make_pipeline(
        TimeSeriesScalerMeanVariance(),
        Segmentation(
            univariate_or_multivariate="multivariate",
            uniform_or_adaptive="uniform",
            mean_or_slope=None,
            n_segments=N_SEGMENTS,
            pen_factor=None
        ),
        SegmentFeature(
            features_names=["mean"]
        ),
        Symbolization(
            n_symbols=N_SYMBOLS,
            symb_method="quantif",
            symb_quantif_method="gaussian",
            symb_cluster_method=None,
            features_scaling=None,
            reconstruct_bool=False,
            n_regime_lengths=None,
            seglen_bins_method=None,
            lookup_table_type = "mindist"
        ),
    )
)

t_start = process_time()

for i in range(n_iter):
    _ = pipe_sax_symb.fit(l_train).transform(l_train_test)

t_stop = process_time()
symb_time_sax = (t_stop-t_start)/n_iter
print("Elapsed time for Vanilla SAX in seconds:\n\t", round(symb_time_sax, 2))

Elapsed time for Vanilla SAX in seconds:
	 0.27


In [12]:
# vanilla SAX (tslearn)

pipe_sax_tslearn_symb = make_pipeline(
    MySAX(
        n_segments=N_SEGMENTS,
        alphabet_size_avg=N_SYMBOLS,
        scale=True
    ),
)

t_start = process_time()

for i in range(n_iter):
    _ = pipe_sax_tslearn_symb.fit(l_train).transform(l_train_test)

t_stop = process_time()
symb_time_sax_tslearn = (t_stop-t_start)/n_iter
print("Elapsed time for vanilla SAX (tslearn) in seconds:\n\t", round(symb_time_sax_tslearn, 2))

Elapsed time for vanilla SAX (tslearn) in seconds:
	 0.02




In [13]:
# 1d-SAX

pipe_1dsax_symb = make_pipeline(
    MyOneD(
        n_segments=N_SEGMENTS,
        alphabet_size_avg=N_SYMBOLS_MEAN,
        alphabet_size_slope=N_SYMBOLS_SLOPE,
        sigma_l=1.,
        scale=True
    ),
)

t_start = process_time()

for i in range(n_iter):
    _ = pipe_1dsax_symb.fit(l_train).transform(l_train_test)

t_stop = process_time()
symb_time_1dsax = (t_stop-t_start)/n_iter
print("Elapsed time for 1d-SAX in seconds:\n\t", round(symb_time_1dsax, 2))



Elapsed time for 1d-SAX in seconds:
	 0.44


In [14]:
# ASTRIDE

pipe_astride_symb = (
    make_pipeline(
        TimeSeriesScalerMeanVariance(),
        Segmentation(
            univariate_or_multivariate="multivariate",
            uniform_or_adaptive="adaptive",
            mean_or_slope="mean",
            n_segments=N_SEGMENTS,
            pen_factor=None
        ),
        SegmentFeature(
            features_names=["mean"]
        ),
        Symbolization(
            n_symbols=N_SYMBOLS,
            symb_method="quantif",
            symb_quantif_method="quantiles",
            symb_cluster_method=None,
            features_scaling=None,
            reconstruct_bool=True,
            n_regime_lengths="divide_exact",
            seglen_bins_method=None,
            lookup_table_type="mof"
        ),
    )
)

t_start = process_time()

for i in range(n_iter):
    _ = pipe_astride_symb.fit(l_train).transform(l_train_test)

t_stop = process_time()
symb_time_astride = (t_stop-t_start)/n_iter
print("Elapsed time for regular ASTRIDE in seconds:\n\t", round(symb_time_astride, 2))

Elapsed time for regular ASTRIDE in seconds:
	 0.31


In [15]:
# FASTRIDE

pipe_fastride_symb = (
    make_pipeline(
        TimeSeriesScalerMeanVariance(),
        Segmentation(
            univariate_or_multivariate="multivariate",
            uniform_or_adaptive="uniform",
            mean_or_slope=None,
            n_segments=N_SEGMENTS,
            pen_factor=None
        ),
        SegmentFeature(
            features_names=["mean"]
        ),
        Symbolization(
            n_symbols=N_SYMBOLS,
            symb_method="quantif",
            symb_quantif_method="quantiles",
            symb_cluster_method=None,
            features_scaling=None,
            reconstruct_bool=False,
            n_regime_lengths=None,
            seglen_bins_method=None,
            lookup_table_type="mof"
        ),
    )
)


cumul_time = 0
for i in range(n_iter):
    t_start = process_time()
    _ = pipe_fastride_symb.fit(l_train).transform(l_train_test)
    t_stop = process_time()
    cumul_time += t_stop-t_start

symb_time_fastride = cumul_time/n_iter
print("Elapsed time for fASTRIDE in seconds:\n\t", round(symb_time_fastride, 2))

Elapsed time for fASTRIDE in seconds:
	 0.27


# 1-NN classification processing time

In [16]:
n_iter = 10

In [17]:
# Vanilla SAX by default

pipe_sax_clf = (
    make_pipeline(
        SymbolicSignalDistance(
            distance="euclidean",
            n_samples=N_SAMPLES,
            weighted_bool=True
        ),
        KNeighborsClassifier(
            n_neighbors=1,
            metric="precomputed"
        )
    )
)

pipe_sax_symb.fit(l_train)
bunch_train = pipe_sax_symb.transform(l_train)
bunch_test = pipe_sax_symb.transform(l_test)
print("Total number of symbols:", np.sum([len(symb_sig) for symb_sig in bunch_test.list_of_symbolic_signals]))
pipe_sax_clf.fit(bunch_train, y_train)

cumul_time = 0
for i in range(n_iter):
    t_start = process_time()
    _ = pipe_sax_clf.predict(bunch_test)
    t_stop = process_time()
    cumul_time += t_stop-t_start

clf_time_sax = cumul_time/n_iter
print("Elapsed time for Vanilla SAX in seconds:\n\t", round(clf_time_sax, 2))

Total number of symbols: 1000
Elapsed time for Vanilla SAX in seconds:
	 0.08


In [18]:
# vanilla SAX (tslearn)

pipe_sax_tslearn_clf = (
    make_pipeline(
        DistInterfaceSAX(),
        KNeighborsClassifier(n_neighbors=1, metric="precomputed"),
    )
)

pipe_sax_tslearn_symb.fit(l_train)
bunch_train = pipe_sax_tslearn_symb.transform(l_train)
bunch_test = pipe_sax_tslearn_symb.transform(l_test)
pipe_sax_tslearn_clf.fit(bunch_train, y_train)

cumul_time = 0
for i in range(n_iter):
    t_start = process_time()
    _ = pipe_sax_tslearn_clf.predict(bunch_test)
    t_stop = process_time()
    cumul_time += t_stop-t_start

clf_time_sax_tslearn = cumul_time/n_iter
print("Elapsed time for vanilla SAX (`tslearn`) in seconds:\n\t", round(clf_time_sax_tslearn, 2))



Elapsed time for vanilla SAX (`tslearn`) in seconds:
	 0.11


In [19]:
# 1d-SAX

pipe_1dsax_clf = (
    make_pipeline(
        DistInterfaceOneD(),
        KNeighborsClassifier(n_neighbors=1, metric="precomputed"),
    )
)

pipe_1dsax_symb.fit(l_train)
bunch_train = pipe_1dsax_symb.transform(l_train)
bunch_test = pipe_1dsax_symb.transform(l_test)
pipe_1dsax_clf.fit(bunch_train, y_train)

cumul_time = 0
for i in range(n_iter):
    t_start = process_time()
    _ = pipe_1dsax_clf.predict(bunch_test)
    t_stop = process_time()
    cumul_time += t_stop-t_start

clf_time_1dsax = cumul_time/n_iter
print("Elapsed time for 1d-SAX in seconds:\n\t", round(clf_time_1dsax, 2))



Elapsed time for 1d-SAX in seconds:
	 0.22


In [20]:
# ASTRIDE

pipe_astride_clf = (
    make_pipeline(
        SymbolicSignalDistance(
            distance="lev",
            n_samples=None,
            weighted_bool=True
        ),
        KNeighborsClassifier(
            n_neighbors=1,
            metric="precomputed"
        )
    )
)

pipe_astride_symb.fit(l_train)
bunch_train = pipe_astride_symb.transform(l_train)
bunch_test = pipe_astride_symb.transform(l_test)
print("Total number of symbols:", np.sum([len(symb_sig) for symb_sig in bunch_test.list_of_symbolic_signals]))
pipe_astride_clf.fit(bunch_train, y_train)

cumul_time = 0
for i in range(n_iter):
    t_start = process_time()
    _ = pipe_astride_clf.predict(bunch_test)
    t_stop = process_time()
    cumul_time += t_stop-t_start
clf_time_astride = cumul_time/n_iter
print("Elapsed time for regular ASTRIDE in seconds:\n\t", round(clf_time_astride, 2))

Total number of symbols: 4800
Elapsed time for regular ASTRIDE in seconds:
	 0.17


In [21]:
# FASTRIDE

pipe_fastride_clf = (
    make_pipeline(
        SymbolicSignalDistance(
            distance="lev",
            n_samples=None,
            weighted_bool=True
        ),
        KNeighborsClassifier(
            n_neighbors=1,
            metric="precomputed"
        )
    )
)

pipe_fastride_symb.fit(l_train)
bunch_train = pipe_fastride_symb.transform(l_train)
bunch_test = pipe_fastride_symb.transform(l_test)
print("Total number of symbols:", np.sum([len(symb_sig) for symb_sig in bunch_test.list_of_symbolic_signals]))
pipe_fastride_clf.fit(bunch_train, y_train)

cumul_time = 0
for i in range(n_iter):
    t_start = process_time()
    _ = pipe_fastride_clf.predict(bunch_test)
    t_stop = process_time()
    cumul_time += t_stop-t_start
clf_time_fastride = cumul_time/n_iter
print("Elapsed time for FASTRIDE in seconds:\n\t", round(clf_time_fastride, 2))

Total number of symbols: 1000
Elapsed time for FASTRIDE in seconds:
	 0.07


# Assembling everything

In [22]:
print(f"{DATASET_NAME_UCR = }")
print(f"{N_SEGMENTS = }")
print(f"{N_SYMBOLS = }")
print(f"{N_SAMPLES = }")
print(f"{len(X_train) = }")
print(f"{len(X_test) = }")

d = dict()
d["symb"] = [symb_time_sax, symb_time_sax_tslearn, symb_time_1dsax, symb_time_astride, symb_time_fastride]
d["clf"] = [clf_time_sax, clf_time_sax_tslearn, clf_time_1dsax, clf_time_astride, clf_time_fastride]
df = pd.DataFrame(d)
df.index = ["SAX", "SAX (tslearn)", "1d-SAX (tslearn)", "ASTRIDE", "FASTRIDE"]
display(df.round(2))

DATASET_NAME_UCR = 'ECG200'
N_SEGMENTS = 10
N_SYMBOLS = 9
N_SAMPLES = 96
len(X_train) = 100
len(X_test) = 100


Unnamed: 0,symb,clf
SAX,0.27,0.08
SAX (tslearn),0.02,0.11
1d-SAX (tslearn),0.44,0.22
ASTRIDE,0.31,0.17
FASTRIDE,0.27,0.07


*Note*: It correponds to Table 6 of the paper.