In [43]:
import numpy as np
import time

def sim_data():
    # simulate data as sinusoid for 1000 samples, add white noise
    t = np.linspace(0, 2*3.14, 2048)
    x = np.repeat(np.sin(t).reshape(-1, 1), 1000, axis=1).T
    x += np.random.uniform(0, 1, size=x.shape)
    y = np.array(np.random.choice([0, 1], x.shape[0]), dtype=str)
    return x, y

def sktime_knn(x, y, distance='dtw'):
    import numpy as np
    from sktime.utils.data_processing import from_2d_array_to_nested
    from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

    # convert to sktime data rep
    X_train = from_2d_array_to_nested(x)

    # init and fit model
    clf = KNeighborsTimeSeriesClassifier(distance=distance)
    
    ## clock fit
    tic = time.time()
    clf.fit(X_train, y)
    print('... fit in {:.1f}-s'.format(time.time() - tic))

    # eval
    clf.score(X_train, y) # SLOW, does not complete computation
    # clf.predict(X_train) # same problem as above, also slow step in calling score method
    
    return clf

def sktime_clf(x, y, model='shapelet'):
    import numpy as np
    from sktime.utils.data_processing import from_2d_array_to_nested
    
    if model == 'shapelet':
        from sktime.classification.shapelet_based import ShapeletTransformClassifier
        clf = ShapeletTransformClassifier()
    elif model == 'cboss':
        from sktime.classification.dictionary_based import ContractableBOSS
        clf = ContractableBOSS()
    elif model =='RF':
        from sktime.classification.interval_based import TimeSeriesForestClassifier
        clf = TimeSeriesForestClassifier()
    else:
        print('TODO (enhancement): add other models')
    
    # convert to sktime data rep
    X_train = from_2d_array_to_nested(x)

    # clock fit
    tic = time.time()
    clf.fit(X_train, y)
    print('... fit in {:.1f}-s'.format(time.time() - tic))

    # eval
    clf.score(X_train, y) # SLOW, does not complete computation
    # clf.predict(X_train) # same problem as above, also slow step in calling score method
    
    return clf
    
def sklearn_clf(x, y):
    from sklearn.neighbors import KNeighborsClassifier

    ## ONLY DIFF: here, no conversion to sktime data rep
    X_train = x # not, `X_train = from_2d_array_to_nested(x)`

    # init and fit model
    clf = KNeighborsClassifier()
    
    # clock fit
    tic = time.time()
    clf.fit(X_train, y)
    print('... fit in {:.1f}-s'.format(time.time() - tic))

    # eval
    clf.score(X_train, y)
    
    return clf

In [26]:
# main

## get data
x, y = sim_data()

In [27]:
## clock comparable sklearn classifier
tic = time.time()
_ = sklearn_clf(x, y)
print('sklearn clf train/inf in: {:.1f}-s'.format(time.time() - tic))

... fit in 0.0-s
sklearn clf train/inf in: 0.1-s


In [28]:
## clock sktime classifiers
tic = time.time()
_ = sktime_knn(x, y, distance='euclidean')
print('sktime knn-euclidean train/inf in: {:.1f}-s'.format(time.time() - tic))

... fit in 0.0-s
sktime knn-euclidean train/inf in: 46.2-s


In [22]:
#! NOTE: here, I think there is a bug... I have to kill the program after 5min

## clock sktime classifiers
tic = time.time()
_ = sktime_knn(x, y, distance='erp')
print('sktime knn-erp train/inf in: {:.1f}-s'.format(time.time() - tic))

KeyboardInterrupt: 

In [None]:
#! NOTE2: here also, I think there is a bug... I have to kill the program after 5min

## clock sktime classifiers
tic = time.time()
_ = sktime_knn(x, y, distance='dtw')
print('sktime knn-dtw train/inf in: {:.1f}-s'.format(time.time() - tic))

In [44]:
### other sktime clf'ers

#! NOTE3: here also, I think there is a bug... I have to kill the program after 5min

## clock sktime classifiers
tic = time.time()
_ = sktime_clf(x, y, model='RF')
print('sktime clf train/inf in: {:.1f}-s'.format(time.time() - tic))

... fit in 55.5-s
sktime clf train/inf in: 81.9-s
