In [1]:
import timeit
import warnings
from benchmarks.composition_benchmark import SETUP, PACKAGES, SEGM, MEAN, STD, SLOPE, UNION, TREE, PIPELINE

In [2]:
repeats = 5
runs = 20

ALT_PACKAGES = """
from sktime.utils.data_container import tabularize
from extensionarray.timeframe import TimeFrame
from extensionarray.reimplement import RandomIntervalSegmenter
from extensionarray.reimplement import RowwiseTransformer
"""

ALT_X = """TimeFrame(data={{'dim_0': tabularize(X_base['dim_0'], return_array=True), 'class_val': X_base['class_val']}})"""




In [3]:
def run_sizes(SETUP_CODE, TEST_CODE):
    """Helper function to run with different data sizes"""
    for size in ['small', 'medium', 'large']:
        print(size + ":")
        print(timeit.repeat(setup=SETUP_CODE.format(size=size), stmt=TEST_CODE, repeat=repeats, number=runs))
        print("\n")

# Segmentation

## Current implementation

In [4]:
STD_SETUP = SETUP.format(packages=PACKAGES, X="X_base", size="{size}")
exec(STD_SETUP.format(size='small') + SEGM + "print(X_segm.head())")

                                        dim_0_11_111  \
0  [-0.65125, -0.65729, -0.6622, -0.66123, -0.660...   
1  [-0.63372, -0.63205, -0.63228, -0.62956, -0.62...   
2  [-0.76598, -0.76857, -0.77674, -0.78191, -0.78...   
3  [-0.74568, -0.7457, -0.74562, -0.74635, -0.745...   
4  [-0.57849, -0.57798, -0.57654, -0.57862, -0.57...   

                                         dim_0_39_44  \
0  [-0.65132, -0.64814, -0.64733, -0.6471, -0.64486]   
1  [-0.59777, -0.59699, -0.59715, -0.59554, -0.59...   
2  [-0.63927, -0.63442, -0.6287, -0.63341, -0.62427]   
3   [-0.58557, -0.5858, -0.57486, -0.55793, -0.5389]   
4  [-0.56457, -0.56116, -0.55726, -0.55356, -0.53...   

                                         dim_0_9_103  
0  [-0.64712, -0.64915, -0.65125, -0.65729, -0.66...  
1  [-0.63538, -0.63411, -0.63372, -0.63205, -0.63...  
2  [-0.76536, -0.76562, -0.76598, -0.76857, -0.77...  
3  [-0.74582, -0.74599, -0.74568, -0.7457, -0.745...  
4  [-0.58385, -0.5816, -0.57849, -0.57798, -0.576..

In [5]:
run_sizes(STD_SETUP, SEGM)


small:
[0.10969470000000037, 0.13369939999999936, 0.10161839999999955, 0.10834799999999944, 0.09558479999999925]


medium:
[1.5246385, 1.484832299999999, 1.5823298000000001, 1.5095939999999999, 1.4916271999999982]


large:
[14.556189799999999, 14.014290200000005, 14.138664999999996, 14.109760800000004, 14.108879400000006]




## ExtensionArray + Numpy implementation

In [5]:
ALT_SETUP = SETUP.format(packages=ALT_PACKAGES, X=ALT_X, size="{size}")
exec(ALT_SETUP.format(size='small') + SEGM + "print(X_segm.head())")

ImportError: cannot import name 'RowwiseTransformer' from 'extensionarray.reimplement' (C:\Users\rocke\Google Drive\Universitaet\Data Science\sktime_datacontainer\extensionarray\reimplement.py)

In [7]:
run_sizes(ALT_SETUP, SEGM)


small:
[0.06628040000001079, 0.08575989999999933, 0.06358070000000282, 0.06534929999999406, 0.06319419999999809]


medium:
[0.08673369999999636, 0.0977955000000037, 0.10113289999999608, 0.12012599999999907, 0.08577139999999872]


large:
[0.5269088999999951, 0.5246692999999993, 0.5611305999999985, 0.4405219999999872, 0.5659126000000043]




# Rowwise transformation

## Current implementation

In [8]:
exec(STD_SETUP.format(size='small') + SEGM + MEAN + "print(X_mean.head())")


   dim_0_70_107  dim_0_1_62  dim_0_69_76
0      1.462785   -0.580453     1.807043
1      1.107889   -0.413491     1.907700
2      1.140931   -0.440353     1.695229
3      0.943944   -0.312273     1.777943
4      0.555144   -0.170879     2.032100


In [9]:
exec(STD_SETUP.format(size='small') + SEGM + STD + "print(X_std.head())")


   dim_0_39_87  dim_0_127_134  dim_0_33_127
0     1.105786       0.002133      1.089272
1     1.058984       0.000622      1.098032
2     0.978018       0.026444      1.024700
3     0.927832       0.012739      1.030977
4     0.956145       0.005676      1.114464


In [10]:
exec(STD_SETUP.format(size='small') + SEGM + SLOPE + "print(X_slope.head())")


   dim_0_110_134  dim_0_129_141  dim_0_92_134
0       0.006317      -0.000508     -0.058610
1       0.006296       0.000388     -0.030110
2      -0.005792      -0.000209     -0.041005
3      -0.013598       0.006961     -0.026719
4      -0.003493      -0.004105      0.001884


In [11]:
run_sizes(STD_SETUP + SEGM, MEAN + STD + SLOPE)



small:
[0.23828059999999596, 0.21859929999999395, 0.24000379999999666, 0.21712300000000084, 0.20858399999998767]


medium:
[3.149395300000009, 3.231275800000006, 3.169540900000001, 3.2101641, 3.1789712000000065]


large:
[31.482938100000013, 30.517155400000007, 31.595720700000015, 30.763919299999998, 32.66773760000004]




## ExtensionArray + Numpy implementation

In [5]:
ALT_MEAN = """
mean_transformer = RowwiseTransformer(np.mean)
X_mean = mean_transformer.fit_transform(X_segm)
"""
exec(ALT_SETUP.format(size='small') + SEGM + ALT_MEAN + "print(X_mean.head())")

   dim_0_18_59  dim_0_109_123  dim_0_31_58
0    -0.618492      -0.707619    -0.614147
1    -0.430778      -0.725378    -0.389671
2    -0.451582      -0.594396    -0.362561
3    -0.281947      -0.580216    -0.131013
4    -0.156994      -0.564249    -0.025607


In [6]:
ALT_STD = """
std_transformer = RowwiseTransformer(np.std)
X_std = std_transformer.fit_transform(X_segm)
"""
exec(ALT_SETUP.format(size='small') + SEGM + ALT_STD + "print(X_std.head())")

   dim_0_53_145  dim_0_46_62  dim_0_109_128
0      1.087496     0.371729       0.045903
1      1.117559     0.621160       0.051841
2      1.050012     0.820819       0.037030
3      1.092412     0.766100       0.094944
4      1.151984     0.732594       0.029085


In [7]:
ALT_SLOPE = """
slope_transformer = RowwiseTransformer(time_series_slope)
X_slope = slope_transformer.fit_transform(X_segm)
"""
exec(ALT_SETUP.format(size='small') + SEGM + ALT_SLOPE + "print(X_slope.head())")

   dim_0_101_125  dim_0_62_101  dim_0_107_132
0      -0.048157      0.002072       0.001055
1      -0.001512     -0.038877       0.007891
2      -0.018055     -0.027374      -0.005353
3      -0.011649     -0.049989      -0.013679
4       0.001000     -0.093076      -0.003408


In [10]:
run_sizes(ALT_SETUP + SEGM, ALT_MEAN + ALT_STD + ALT_SLOPE)


small:
[0.24553279999999944, 0.24443860000000228, 0.2248829999999984, 0.2299416999999977, 0.2517506999999952]


medium:
[3.7484344000000007, 3.3937011000000012, 3.5439056999999963, 3.5409863000000072, 3.4093864999999965]


large:
[35.6663465, 35.473195700000005, 34.61364400000002, 34.25132400000001, 34.40512820000001]




# Fitting a decision tree

## Current implementation

In [23]:
exec(STD_SETUP.format(size='small') + SEGM + MEAN + STD + SLOPE + UNION + TREE + "print(dt)")

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')


In [24]:
run_sizes(STD_SETUP + SEGM + MEAN + STD + SLOPE, UNION + TREE)



small:
[0.031403799999679904, 0.02491620000000694, 0.024598100000730483, 0.026665500001399778, 0.031718000002001645]


medium:
[0.04625060000034864, 0.04542270000092685, 0.03674659999887808, 0.04156170000351267, 0.04483960000288789]


large:
[0.2219431000012264, 0.2598431000005803, 0.22165049999966868, 0.20182000000204425, 0.2367049999993469]




## ExtensionArray + Numpy implementation

In [25]:
exec(ALT_SETUP.format(size='small') + SEGM + ALT_MEAN + ALT_STD + ALT_SLOPE + UNION + TREE + "print(dt)")

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')


In [11]:
run_sizes(ALT_SETUP + SEGM + ALT_MEAN + ALT_STD + ALT_SLOPE, UNION + TREE)



small:
[0.02296939999996539, 0.02353130000000192, 0.025058500000000095, 0.02379150000001573, 0.026571699999976772]


medium:
[0.042511499999989155, 0.0746403999999643, 0.037456799999972645, 0.03554190000005519, 0.036327200000016546]


large:
[0.212218699999994, 0.2120019000000184, 0.20480729999997038, 0.21409270000003744, 0.2272186000000147]




In [14]:
print(timeit.repeat(setup=STD_SETUP.format(size='large'), stmt=SEGM + MEAN + STD + SLOPE + UNION + TREE, repeat=5, number=2))


[4.500199299999963, 4.499760700000024, 4.517597899999998, 4.650881099999992, 4.584968500000002]


In [15]:
print(timeit.repeat(setup=ALT_SETUP.format(size='large'), stmt=SEGM + ALT_MEAN + ALT_STD + ALT_SLOPE + UNION + TREE, repeat=5, number=2))


[3.3976888999999915, 3.3942763000000014, 4.620344700000032, 3.581012900000019, 3.380076200000019]


# Entire pipeline

## Current implementation

In [16]:
exec(STD_SETUP.format(size='small') + PIPELINE)



In [22]:
with warnings.catch_warnings():
    # ignore all caught warnings
    warnings.filterwarnings("ignore")
    print(timeit.repeat(setup=STD_SETUP.format(size='large'), stmt=PIPELINE, repeat=5, number=2))



[13.70699060000004, 13.756133999999975, 13.751664600000026, 14.061273500000084, 13.673372099999938]


In [18]:
ALT_PIPELINE = """
steps = [
    ('segment', RandomIntervalSegmenter(n_intervals='sqrt')),
    ('transform', FeatureUnion([
        ('mean', RowwiseTransformer(np.mean)),
        ('std', RowwiseTransformer(np.std)),
        ('slope', RowwiseTransformer(time_series_slope))
    ])),
    ('clf', DecisionTreeClassifier())
]
base_estimator = Pipeline(steps, random_state=1)
base_estimator.fit(X, y)
"""

In [19]:
exec(ALT_SETUP.format(size='small') + ALT_PIPELINE)




In [21]:
with warnings.catch_warnings():
    # ignore all caught warnings
    warnings.filterwarnings("ignore")
    print(timeit.repeat(setup=ALT_SETUP.format(size='large'), stmt=ALT_PIPELINE, repeat=5, number=2))

[14.349045900000078, 13.597524900000053, 13.481302700000015, 14.245717300000024, 13.757090500000004]
