In [1]:
import timeit
import numpy as np
from benchmarks.tabularise_benchmark import SETUP, PACKAGES, TEST

In [2]:
repeats = 5
runs = 20

In [3]:
def run_sizes(SETUP_CODE, TEST_CODE):
    """Helper function to run with different data sizes"""
    for size in ['small', 'medium', 'large']:
        times = np.array(timeit.repeat(setup=SETUP_CODE.format(size=size), stmt=TEST_CODE, repeat=repeats, number=runs))
        
        print(size + ":")
        print(times / runs)
        print("\n")

# Tabularise

## Current implementation


In [4]:
STD_SETUP = SETUP.format(packages=PACKAGES, X="X_base", size="{size}")
exec(STD_SETUP.format(size='small') + TEST + "print(X_tab.head())")

   dim_0__0  dim_0__1  dim_0__2  dim_0__3  dim_0__4  dim_0__5  dim_0__6  \
0  -0.64789  -0.64199  -0.63819  -0.63826  -0.63835  -0.63870  -0.64305   
1  -0.64443  -0.64540  -0.64706  -0.64749  -0.64691  -0.64388  -0.63973   
2  -0.77835  -0.77828  -0.77715  -0.77768  -0.77590  -0.77242  -0.76546   
3  -0.75006  -0.74810  -0.74616  -0.74593  -0.74377  -0.74381  -0.74521   
4  -0.59954  -0.59742  -0.59927  -0.59826  -0.59758  -0.59130  -0.58902   

   dim_0__7  dim_0__8  dim_0__9  ...  dim_0__140  dim_0__141  dim_0__142  \
0  -0.64377  -0.64505  -0.64712  ...    -0.63926    -0.63972    -0.63973   
1  -0.63809  -0.63530  -0.63538  ...    -0.64114    -0.64143    -0.63927   
2  -0.76228  -0.76375  -0.76536  ...    -0.72206    -0.71871    -0.71353   
3  -0.74508  -0.74573  -0.74582  ...    -0.72167    -0.72466    -0.72923   
4  -0.58753  -0.58546  -0.58385  ...    -0.64404    -0.64388    -0.64574   

   dim_0__143  dim_0__144  dim_0__145  dim_0__146  dim_0__147  dim_0__148  \
0    -0.64018  

In [5]:
run_sizes(STD_SETUP, TEST)



small:
[0.00340176 0.00345375 0.0038984  0.00376559 0.00370237]


medium:
[0.06099804 0.06516728 0.07981695 0.07428021 0.06207213]


large:
[0.57888375 0.67165804 0.61649048 0.59075426 0.6423671 ]




## ExtensionArray + Numpy implementation

In [6]:
ALT_PACKAGES = """
from sktime.utils.data_container import tabularize
from extensionarray.timeframe import TimeFrame
"""

ALT_X = """TimeFrame(data={{'dim_0': tabularize(X_base['dim_0'], return_array=True), 'class_val': X_base['class_val']}})"""

ALT_TEST = """
X_tab = X.tabularise()
"""

In [7]:
ALT_SETUP = SETUP.format(packages=ALT_PACKAGES, X=ALT_X, size="{size}")
exec(ALT_SETUP.format(size='small') + ALT_TEST + "print(X_tab.head())")

   dim_0_0  dim_0_1  dim_0_2  dim_0_3  dim_0_4  dim_0_5  dim_0_6  dim_0_7  \
0 -0.64789 -0.64199 -0.63819 -0.63826 -0.63835 -0.63870 -0.64305 -0.64377   
1 -0.64443 -0.64540 -0.64706 -0.64749 -0.64691 -0.64388 -0.63973 -0.63809   
2 -0.77835 -0.77828 -0.77715 -0.77768 -0.77590 -0.77242 -0.76546 -0.76228   
3 -0.75006 -0.74810 -0.74616 -0.74593 -0.74377 -0.74381 -0.74521 -0.74508   
4 -0.59954 -0.59742 -0.59927 -0.59826 -0.59758 -0.59130 -0.58902 -0.58753   

   dim_0_8  dim_0_9  ...  dim_0_140  dim_0_141  dim_0_142  dim_0_143  \
0 -0.64505 -0.64712  ...   -0.63926   -0.63972   -0.63973   -0.64018   
1 -0.63530 -0.63538  ...   -0.64114   -0.64143   -0.63927   -0.63780   
2 -0.76375 -0.76536  ...   -0.72206   -0.71871   -0.71353   -0.71002   
3 -0.74573 -0.74582  ...   -0.72167   -0.72466   -0.72923   -0.72894   
4 -0.58546 -0.58385  ...   -0.64404   -0.64388   -0.64574   -0.64646   

   dim_0_144  dim_0_145  dim_0_146  dim_0_147  dim_0_148  dim_0_149  
0   -0.63923   -0.63939   -0.64023

In [8]:
run_sizes(ALT_SETUP, ALT_TEST)


small:
[0.00039327 0.000369   0.00041058 0.00054704 0.00036224]


medium:
[0.00090742 0.00090015 0.00091263 0.00099709 0.00091062]


large:
[0.00946142 0.00935255 0.00976064 0.00938045 0.00931351]


