In [7]:
import random
import numpy as np
import pandas as pd

In [8]:
from sklearn.pipeline import Pipeline
from sktime.datasets import load_basic_motions
from sktime.transformers.series_as_features.compose import ColumnConcatenator
from sktime.classification.compose import TimeSeriesForestClassifier

# Ordinary situation

In [9]:
X_train, y_train = load_basic_motions(split='TRAIN', return_X_y=True)
X_test, y_test = load_basic_motions(split='TEST', return_X_y=True)


steps = [
    ('concatenate', ColumnConcatenator()),
    ('classify', TimeSeriesForestClassifier(n_estimators=100))]
clf = Pipeline(steps)
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

1.0

# If time serial are unequal length -> algorithm raises an error

In [10]:
# randomly cut the data series
def random_cut(df):
    for row_i in range(df.shape[0]):
        for dim_i in range(df.shape[1]):
            ts = df.at[row_i, f'dim_{dim_i}']
            df.at[row_i, f'dim_{dim_i}'] = pd.Series(ts.tolist()[:random.randint(len(ts)-5, len(ts)-3)]) # here is a problem

In [16]:
X_train, y_train = load_basic_motions(split='TRAIN', return_X_y=True)
X_test, y_test = load_basic_motions(split='TEST', return_X_y=True)
            
for df in [X_train, X_test]:
    random_cut(df)
    
try:
    steps = [
        ('concatenate', ColumnConcatenator()),
        ('classify', TimeSeriesForestClassifier(n_estimators=100))]
    clf = Pipeline(steps)
    clf.fit(X_train, y_train)
    clf.score(X_test, y_test)
except ValueError as e:
    print(f"IndexError: {e}")

IndexError: Tabularization failed, it's possible that not all series were of equal length


# Now the resizing enrolls

In [19]:
from sktime.transformers.series_as_features.interpolate import TSInterpolator 

X_train, y_train = load_basic_motions(split='TRAIN', return_X_y=True)
X_test, y_test = load_basic_motions(split='TEST', return_X_y=True)
            
for df in [X_train, X_test]:
    random_cut(df)
    
steps = [
    ('transform', TSInterpolator(50)),
    ('concatenate', ColumnConcatenator()),
    ('classify', TimeSeriesForestClassifier(n_estimators=100))]
clf = Pipeline(steps)
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

1.0