In [1]:
import timeit
import warnings
import numpy as np
from benchmarks.composition_benchmark import \
    SETUP, PACKAGES, \
    SEGM, \
    MEAN, STD, SLOPE, \
    UNION, TREE, \
    PIPELINE_SETUP, PIPELINE_RUN



In [2]:
repeats = 5
runs = 20

ALT_PACKAGES = """
from sktime.utils.data_container import tabularize
from extensionarray.timeframe import TimeFrame
from extensionarray.reimplement import RandomIntervalSegmenter
from extensionarray.reimplement import RowwiseTransformer
"""

ALT_X = """TimeFrame(data={{'dim_0': tabularize(X_base['dim_0'], return_array=True), 'class_val': X_base['class_val']}})"""




In [3]:
def run_sizes(SETUP_CODE, TEST_CODE):
    """Helper function to run with different data sizes"""
    for size in ['small', 'medium', 'large']:
        times = np.array(timeit.repeat(setup=SETUP_CODE.format(size=size), stmt=TEST_CODE, repeat=repeats, number=runs))
        
        print(size + ":")
        print(times / runs)
        print("\n")

# Segmentation

## Current implementation

In [4]:
STD_SETUP = SETUP.format(packages=PACKAGES, X="X_base", size="{size}")
exec(STD_SETUP.format(size='small') + SEGM + "print(X_segm.head())")

                                        dim_0_73_144  \
0  [1.8206, 1.8172, 1.8316, 1.8316, 1.829, 1.8454...   
1  [1.9097, 1.9175, 1.919, 1.919, 1.9252, 1.9172,...   
2  [1.6966, 1.6942, 1.6887, 1.6887, 1.6866, 1.678...   
3  [1.7762, 1.7765, 1.7774, 1.7774, 1.7719, 1.764...   
4  [2.0341, 2.0241, 2.0245, 2.0245, 2.0274, 2.013...   

                                         dim_0_37_92  \
0  [-0.65436, -0.65252, -0.65132, -0.64814, -0.64...   
1  [-0.59801, -0.59754, -0.59777, -0.59699, -0.59...   
2  [-0.71917, -0.66384, -0.63927, -0.63442, -0.62...   
3  [-0.57064, -0.57865, -0.58557, -0.5858, -0.574...   
4  [-0.57038, -0.56544, -0.56457, -0.56116, -0.55...   

                                       dim_0_108_136  
0  [-0.54788, -0.6577, -0.74082, -0.77535, -0.782...  
1  [-0.8142, -0.80859, -0.79895, -0.78046, -0.763...  
2  [-0.53307, -0.55715, -0.56284, -0.57744, -0.59...  
3  [-0.53482, -0.5397, -0.54971, -0.5532, -0.5542...  
4  [-0.54124, -0.5239, -0.5189, -0.52143, -0.5298..

In [5]:
run_sizes(STD_SETUP, SEGM)


small:
[0.00441236 0.00441451 0.00580169 0.00403505 0.00397807]


medium:
[0.07447419 0.07665882 0.07691327 0.07531359 0.06344091]


large:
[0.6458689  0.60950201 0.66679824 0.67645553 0.63480974]




## ExtensionArray + Numpy implementation

In [6]:
ALT_SETUP = SETUP.format(packages=ALT_PACKAGES, X=ALT_X, size="{size}")
exec(ALT_SETUP.format(size='small') + SEGM + "print(X_segm.head())")

                                       dim_0_110_143  \
0  [-0.74082 -0.77535 -0.78246 -0.76929 -0.74328 ...   
1  [-0.79895 -0.78046 -0.76326 -0.75208 -0.73252 ...   
2  [-0.56284 -0.57744 -0.59807 -0.6201  -0.63717 ...   
3  [-0.54971 -0.5532  -0.55425 -0.55387 -0.55306 ...   
4  [-0.5189  -0.52143 -0.52985 -0.5413  -0.5559  ...   

                                         dim_0_26_62  \
0  [-0.66141 -0.66145 -0.66037 -0.65911 -0.65974 ...   
1  [-0.61068  -0.61113  -0.61108  -0.61171  -0.61...   
2  [-0.7699   -0.77251  -0.77291  -0.77447  -0.77...   
3  [-0.7412   -0.74076  -0.73883  -0.73786  -0.72...   
4  [-0.57425  -0.57437  -0.57325  -0.57393  -0.57...   

                                        dim_0_62_148  
0  [ 0.60381   0.7971    0.98755   1.2038    1.40...  
1  [ 1.4187    1.5884    1.7182    1.8249    1.84...  
2  [ 1.675     1.6882    1.6986    1.7035    1.70...  
3  [ 1.7661    1.7717    1.7736    1.7781    1.78...  
4  [ 2.0409    2.0521    2.0534    2.0363    2.03..

In [7]:
run_sizes(ALT_SETUP, SEGM)


small:
[0.00329657 0.00394008 0.0034526  0.0031873  0.00320282]


medium:
[0.00431937 0.00433746 0.00460315 0.00515454 0.00430318]


large:
[0.02498852 0.02691219 0.02294537 0.02161233 0.0246959 ]




# Rowwise transformation

## Current implementation

In [8]:
exec(STD_SETUP.format(size='small') + SEGM + MEAN + "print(X_mean.head())")


   dim_0_133_146  dim_0_67_147  dim_0_96_119
0      -0.637561      0.412180     -0.039188
1      -0.640975      0.242139     -0.512866
2      -0.722818      0.266120     -0.264547
3      -0.729214      0.163796     -0.428085
4      -0.637082      0.033954     -0.600508


In [9]:
exec(STD_SETUP.format(size='small') + SEGM + STD + "print(X_std.head())")


   dim_0_91_102  dim_0_104_117  dim_0_27_55
0      0.354665       0.297043     0.023377
1      0.523599       0.037384     0.175976
2      0.484023       0.097999     0.305916
3      0.431976       0.022855     0.435752
4      0.098442       0.032886     0.512488


In [10]:
exec(STD_SETUP.format(size='small') + SEGM + SLOPE + "print(X_slope.head())")


   dim_0_92_142  dim_0_22_148  dim_0_147_150
0     -0.043489     -0.001830       0.000885
1     -0.021316     -0.005554       0.001665
2     -0.031875     -0.004706      -0.001460
3     -0.020318     -0.007317       0.000420
4      0.000641     -0.008796       0.005810


In [11]:
run_sizes(STD_SETUP + SEGM, MEAN + STD + SLOPE)



small:
[0.01232421 0.01086605 0.01038981 0.01039632 0.01036159]


medium:
[0.14375779 0.13624961 0.14395786 0.14756031 0.14178638]


large:
[1.33136347 1.39616992 1.37778796 1.35339908 1.33615408]




## ExtensionArray + Numpy implementation

In [12]:
ALT_MEAN = """
mean_transformer = RowwiseTransformer(np.mean)
X_mean = mean_transformer.fit_transform(X_segm)
"""
exec(ALT_SETUP.format(size='small') + SEGM + ALT_MEAN + "print(X_mean.head())")

   dim_0_104_122  dim_0_110_142  dim_0_6_138
0      -0.574764      -0.668839     0.087215
1      -0.744521      -0.675340     0.087332
2      -0.553785      -0.653108     0.100037
3      -0.554771      -0.685163     0.099865
4      -0.568657      -0.596140     0.085525


In [13]:
ALT_STD = """
std_transformer = RowwiseTransformer(np.std)
X_std = std_transformer.fit_transform(X_segm)
"""
exec(ALT_SETUP.format(size='small') + SEGM + ALT_STD + "print(X_std.head())")

   dim_0_129_131  dim_0_86_90  dim_0_8_49
0       0.000065     0.007004    0.007037
1       0.000135     0.111519    0.021685
2       0.007555     0.015713    0.077793
3       0.003460     0.132396    0.125016
4       0.002315     0.289037    0.127197


In [14]:
ALT_SLOPE = """
slope_transformer = RowwiseTransformer(time_series_slope)
X_slope = slope_transformer.fit_transform(X_segm)
"""
exec(ALT_SETUP.format(size='small') + SEGM + ALT_SLOPE + "print(X_slope.head())")

   dim_0_132_139  dim_0_59_88  dim_0_43_69
0      -0.000403     0.049860     0.097277
1       0.000334     0.020416     0.121462
2       0.002301    -0.000090     0.122895
3       0.008018    -0.003230     0.114952
4      -0.004553    -0.034456     0.120464


In [15]:
run_sizes(ALT_SETUP + SEGM, ALT_MEAN + ALT_STD + ALT_SLOPE)


small:
[0.01236816 0.01060348 0.01045412 0.00981298 0.00987581]


medium:
[0.14635239 0.13931574 0.13824431 0.1396536  0.13847   ]


large:
[1.49937354 1.48939394 1.51647517 1.50939551 1.5125714 ]




# Fitting a decision tree

## Current implementation

In [16]:
exec(STD_SETUP.format(size='small') + SEGM + MEAN + STD + SLOPE + UNION + TREE + "print(dt)")

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')


In [17]:
run_sizes(STD_SETUP + SEGM + MEAN + STD + SLOPE, UNION + TREE)



small:
[0.00106064 0.00105854 0.00105967 0.00118644 0.00210369]


medium:
[0.00200619 0.00183035 0.00176818 0.00181486 0.00169548]


large:
[0.01270033 0.01159061 0.01173287 0.01084395 0.01278779]




## ExtensionArray + Numpy implementation

In [18]:
exec(ALT_SETUP.format(size='small') + SEGM + ALT_MEAN + ALT_STD + ALT_SLOPE + UNION + TREE + "print(dt)")

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')


In [19]:
run_sizes(ALT_SETUP + SEGM + ALT_MEAN + ALT_STD + ALT_SLOPE, UNION + TREE)



small:
[0.0011536  0.00102276 0.00139262 0.00107221 0.00107397]


medium:
[0.00194694 0.00186986 0.00168823 0.00190165 0.0017438 ]


large:
[0.00968788 0.0127235  0.01134488 0.01210373 0.01170516]




# Run the entire code (without Pipeline)



## Current implementation

In [20]:
run_sizes(STD_SETUP, SEGM + MEAN + STD + SLOPE + UNION + TREE)

small:
[0.01733257 0.01826856 0.01486659 0.01465826 0.01621894]


medium:
[0.23066847 0.22011596 0.2175926  0.2243509  0.21838134]


large:
[1.98930447 2.0265968  2.05844606 2.01147538 1.94306061]




## ExtensionArray + Numpy implementation

In [21]:
run_sizes(ALT_SETUP, SEGM + ALT_MEAN + ALT_STD + ALT_SLOPE + UNION + TREE)

small:
[0.01636118 0.01432715 0.01413276 0.01408621 0.01366441]


medium:
[0.17042191 0.15812508 0.16013438 0.16113825 0.16511359]


large:
[1.44229146 1.62615226 1.5570826  1.41412449 1.58018648]




# Entire pipeline

## Current implementation

In [22]:
exec(STD_SETUP.format(size='small') + PIPELINE_SETUP + PIPELINE_RUN)



In [23]:
with warnings.catch_warnings():
    # ignore all caught warnings
    warnings.filterwarnings("ignore")
    run_sizes(STD_SETUP + PIPELINE_SETUP, PIPELINE_RUN)



small:
[0.05023511 0.04771913 0.04771489 0.04806326 0.044281  ]


medium:
[0.63621557 0.62057692 0.69196533 0.67869243 0.6639045 ]


large:
[6.2014823  6.29271058 6.52939801 6.4855784  6.63195374]




In [24]:
ALT_PIPELINE_SETUP = """
steps = [
    ('segment', RandomIntervalSegmenter(n_intervals='sqrt')),
    ('transform', FeatureUnion([
        ('mean', RowwiseTransformer(np.mean)),
        ('std', RowwiseTransformer(np.std)),
        ('slope', RowwiseTransformer(time_series_slope))
    ])),
    ('clf', DecisionTreeClassifier())
]
base_estimator = Pipeline(steps, random_state=1)
"""

In [25]:
exec(ALT_SETUP.format(size='small') + ALT_PIPELINE_SETUP + PIPELINE_RUN)




In [26]:
with warnings.catch_warnings():
    # ignore all caught warnings
    warnings.filterwarnings("ignore")
    run_sizes(ALT_SETUP + ALT_PIPELINE_SETUP, PIPELINE_RUN)

small:
[0.05131963 0.0475224  0.05308473 0.05123933 0.04597206]


medium:
[0.62921237 0.6358315  0.69403446 0.61046803 0.64507376]


large:
[6.00081967 6.2081114  6.29292658 6.19396113 6.13856859]


