In [None]:
import pandas as pd
import numpy as np
import time
import random

import sys
sys.path.insert(1, '../rtsvg')

from rtsvg import *
from IPython.display import Javascript, HTML, display
%matplotlib inline

In [None]:
rt = RACETrack()

In [None]:
#
# Create a random dataframe ... dates will be such that week periodic matches temporal barchart
#
def randomDataFrame(num_of_recs,rand_strs=200,rand_strs2=1000):
    nums  = []
    nums2 = []
    strs  = []
    strs2 = []
    ts    = []
    
    for i in range(0,num_of_recs):
        nums.append(random.randint(0,100))
        nums2.append(random.randint(100,10000))
        strs.append('a' + str(random.randint(0,rand_strs)))
        strs2.append('e' + str(random.randint(0,rand_strs2)))
        hour_str = str(random.randint(0,23))
        minute_str = str(random.randint(0,59))
        if len(hour_str) == 1:
            hour_str = "0" + hour_str
        if len(minute_str) == 1:
            minute_str = "0" + minute_str
        ts.append('2023-01-0' + str(random.randint(1,7)) + ' ' + hour_str + ':' + minute_str);
    
    df = pd.DataFrame({'nums':nums,'nums2':nums2,'strs':strs,'strs2':strs2,'timestamp':ts})
    df['timestamp'] = df['timestamp'].astype(np.datetime64)
    
    return df

df = randomDataFrame(1000)

In [None]:
#
# Create a widget panel spec to test
#
spec = {'1,1':('histogram',{'bin_by':rt.createTField('timestamp',"day_of_week")}),
        '1,2':('temporalBarChart',{'min_bar_w':20}),
        '1,3':('periodicBarChart')}
my_svg = rt.multiWidgetPanel(df, spec, w=256, h=384, count_by=None, color_by='strs')
display(HTML(my_svg))

In [None]:
spec_histo_1binby = {'c':('histogram',{'bin_by':['nums']})}
spec_histo_2binby = {'c':('histogram',{'bin_by':['nums', 'strs']})}
spec_histo_3binby = {'c':('histogram',{'bin_by':['nums', 'nums2', 'strs']})}
spec_histo_4binby = {'c':('histogram',{'bin_by':['nums', 'nums2', 'strs', 'strs2']})}
spec_histo_temp   = {'c':('histogram',{'bin_by':rt.createTField('timestamp','day_of_week')})}
spec_temporal     = {'c':('temporalBarChart')}
spec_period_dow   = {'c':('periodicBarChart',{'time_period':'day_of_week'})}
spec_period_dow_h = {'c':('periodicBarChart',{'time_period':'day_of_week_hour'})}
spec_xy_nums      = {'c':('xy',{'x_field':'nums','y_field':'nums2'})}
spec_xy_cats      = {'c':('xy',{'x_field':'strs','y_field':'strs2'})}
spec_xy_time_cat  = {'c':('xy',{'x_field':'timestamp','y_field':'strs2'})}
spec_xy_time_num  = {'c':('xy',{'x_field':'timestamp','y_field':'nums2'})}


specs_to_test = {'histo_1binby':spec_histo_1binby,
                 'histo_2binby':spec_histo_2binby,
                 'histo_3binby':spec_histo_3binby,
                 'histo_4binby':spec_histo_4binby,
                 'histo_temp':  spec_histo_temp,
                 'temporal':    spec_temporal,
                 'period_dow':  spec_period_dow,
                 'period_dow_h':spec_period_dow_h,
                 'xy_num':      spec_xy_nums,
                 'xy_cat':      spec_xy_cats,
                 'xy_time_num': spec_xy_time_num,
                 'xy_time_cat': spec_xy_time_cat}

In [None]:
#
# Try various number of recs and different render options to record time taken
#

recs        = []
time_taken  = []
spec_tested = []
params      = []

n = 1000
while n < 200000:
    df = randomDataFrame(n)
    
    for spec_name,spec in specs_to_test.items():
        for param_i in range(0,4):
            if   param_i == 0:
                count_by=None
                color_by=None
                param_name="None/None"
            elif param_i == 1:
                count_by='nums'
                color_by=None
                param_name="nums/None"
            elif param_i == 2:
                count_by='strs'
                color_by=None
                param_name="strs/None"
            elif param_i == 3:
                count_by='nums'
                color_by='strs'
                param_name="nums/strs"
            elif param_i == 4:
                count_by='strs'
                color_by='strs'
                param_name="strs/strs"
            elif param_i == 5:
                count_by='strs'
                color_by='strs2'
                param_name="strs/strs2"
            
            t0 = time.time()
            rt.multiWidgetPanel(df, spec, w=512, h=512, count_by=count_by, color_by=color_by)
            t1 = time.time()
            
            recs.append(n)
            time_taken.append(t1-t0)
            spec_tested.append(spec_name)
            params.append(param_name)

    # double... and then increment by a fixed amount
    next_inc = n*2
    if (next_inc - n) > 40000:
        n += 40000
    else:
        n = int(n*2)

# Create a dataframe
scale_df = pd.DataFrame({'n':recs,'t':time_taken,'spec':spec_tested,'param':params})
scale_df.sample(10)

In [None]:
#
# Draw Performance
#
perf_spec = {'1,1':('xy',{'x_field':'n','y_field':'t','dot_sz':'large'}),
             '2,1':('histogram',{'bin_by':['spec','param'],'count_by':'t'})}
my_svg = rt.multiWidgetPanel(scale_df, perf_spec, w=1024, h=512, count_by=None, color_by='spec')
display(HTML(my_svg))