In [None]:
#
# Prototyping for the following ... now it's about optimization
#
# H. Rave, V. Molchanov and L. Linsen, "Uniform Sample Distribution in Scatterplots via Sector-based Transformation," 
# 2024 IEEE Visualization and Visual Analytics (VIS), St. Pete Beach, FL, USA, 2024, pp. 156-160, 
# doi: 10.1109/VIS55277.2024.00039. 
# keywords: {Data analysis;Visual analytics;Clutter;Scatterplot de-cluttering;spatial transformation},
#
import polars as pl
import numpy as np
from math import cos, sin, pi, sqrt, atan2
from shapely import Polygon
from   udist_scatterplots_via_sectors          import UDistScatterPlotsViaSectors
from   udist_scatterplots_via_sectors_tile_opt import UDistScatterPlotsViaSectorsTileOpt
import time
import rtsvg
rt = rtsvg.RACETrack()
import random
#num_of_pts   = [40, 60, 20]
#num_of_pts   = [3_000, 3_000, 1_000]
num_of_pts    = [6_000, 8_000, 4_000]
circle_geoms = [(5,5,1),(20,10,2),(8,8,1)]
colors       = ['#ff0000','#006400','#0000ff']
_xvals_, _yvals_, _weights_, _colors_ = [12.0], [8.0], [1.0], ['#000000']
for i in range(len(num_of_pts)):
    for j in range(num_of_pts[i]):
        a, l = random.random() * 2 * pi, random.random() * circle_geoms[i][2]
        x, y = circle_geoms[i][0] + l * cos(a), circle_geoms[i][1] + l * sin(a)
        _xvals_.append(x), _yvals_.append(y), _weights_.append(1.0), _colors_.append(colors[i])
for i in range(100):
    x, y = 20*random.random(), 20*random.random()
    _xvals_.append(x), _yvals_.append(y), _weights_.append(1.0), _colors_.append('#000000')

_iterations_ = 1

t1 = time.time()
#udspvs          = UDistScatterPlotsViaSectors       (_xvals_, _yvals_, _weights_, _colors_, iterations=_iterations_, debug=False)
t2 = time.time()
udspvs_tile_opt = UDistScatterPlotsViaSectorsTileOpt(_xvals_, _yvals_, _weights_, _colors_, iterations=_iterations_, debug=True, num_of_tiles=128)
t3 = time.time()
# 351 Points  | 512 Iterations |  5.66s Polars Time | 10.68s Polars Tile Opt Time            | (M1 Pro 16G)
# ------------+----------------+--------------------+----------------------------------------+-------------
# 7001 Points |   2 Iterations | 12.30s Polars Time |  4.97s Polars Tile Opt Time ( 32 tiles)| (M1 Pro 16G)  # After uniquifying the xoyo sectors
# 7001 Points |   2 Iterations |  9.38s Polars Time |  2.95s Polars Tile Opt Time ( 64 tiles)| (M1 Pro 16G) 
# ------------+----------------+--------------------+----------------------------------------+-------------
# 7101 Points |   2 Iterations |  9.07s Polars Time |  3.02s Polars Tile Opt Time ( 64 tiles)| (M1 Pro 16G)  # After adding the medium way / crossproduct code
# 7101 Points |  16 Iterations | 74.71s Polars Time | 20.41s Polars Tile Opt Time ( 64 tiles)| (M1 Pro 16G)    ... over 3x improvement in performance
# 7101 Points |  16 Iterations | 82.41s Polars Time | 25.91s Polars Tile Opt Time (256 tiles)| (M1 Pro 16G)    ... worse when increased to 256 tiles
# 7101 Points |  16 Iterations | 76.06s Polars Time | 17.74s Polars Tile Opt Time (128 tiles)| (M1 Pro 16G)    ... better at 128 tiles
# ------------+----------------+--------------------+----------------------------------------+-------------
# 7101 Points |  16 Iterations |                    | 17.62s Polars Tile Opt Time (64 tiles) | (M1 Pro 16G)  # After re-working the sector sum operations
# 7101 Points |  16 Iterations |                    | 15.24s Polars Tile Opt Time (128 tiles)| (M1 Pro 16G) 
# 7101 Points |  16 Iterations |                    | 14.68s Polars Tile Opt Time (128 tiles)| (M1 Pro 16G) 
print(f'{len(_xvals_)} Points | {_iterations_} Iterations | {t2-t1:.2f}s Polars Time | {t3-t2:.2f}s Polars Tile Opt Time | (M1 Pro 16G) ')
#rt.tile([udspvs.animateIterations(animation_dur="4s"), udspvs_tile_opt.animateIterations(animation_dur="4s")], spacer=10) # only if debug=True is set
#rt.tile([udspvs         .animateIterations(animation_dur="4s")])
#rt.tile([udspvs_tile_opt.animateIterations(animation_dur="4s")]) # only if debug=True is set

In [None]:
#rt.table(udspvs_tile_opt.renderStages(rt), per_row=4, spacer=10)

In [None]:
_lu_ = {'subroutine':[], 'subroutine_time':[]}
for _subroutine_ in udspvs_tile_opt.time_lu:
    _subroutine_time_ = udspvs_tile_opt.time_lu[_subroutine_]
    _lu_['subroutine']     .append(_subroutine_)
    _lu_['subroutine_time'].append(_subroutine_time_)
rt.histogram(pl.DataFrame(_lu_), bin_by='subroutine', count_by='subroutine_time', w=256, h=384)

'<svg id="histogram_2978457" x="0" y="0" width="256" height="384" xmlns="http://www.w3.org/2000/svg"><rect width="255" height="383" x="0" y="0" fill="#ffffff" stroke="#ffffff" /><rect id="histogram_2978457_encsvgid_s_separate:95:easy:95:hard:95:way_:40:non:45:easy_way:41:" width="242.0" height="14" x="0" y="0" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_sector:95:sums" width="231.2769641385978" height="14" x="0" y="15" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_separate:95:easy:95:hard:95:way_:40:medium_way:41:" width="91.88229126240034" height="14" x="0" y="30" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_join:95:sector:95:info" width="65.2083903561629" height="14" x="0" y="45" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_medium:95:way:95:crossproducts" width="58.57113725650401" height="14" x="0" y="60" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_separate:95:easy:95:hard:95:way_:40:hard_way:41:" width="47.10011847131156" height="14" x="0" y="75" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_hard:95:way:95:arctangents" width="37.907163429131685" height="14" x="0" y="90" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_separate:95:easy:95:hard:95:way_:40:easy_way:41:" width="23.77903398220207" height="14" x="0" y="105" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_cross:95:join:95:tile:95:offsets" width="21.419858242947893" height="14" x="0" y="120" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_xoyo:95:sector:95:creation" width="13.169465040750046" height="14" x="0" y="135" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_ray:95:segment:95:intersections" width="2.7054183603298134" height="14" x="0" y="150" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_add:95:missing:95:sectors" width="2.6374879996731826" height="14" x="0" y="165" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_area:95:calc" width="1.3047946837701112" height="14" x="0" y="180" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_sector:95:uv:95:summation" width="1.1924511986709425" height="14" x="0" y="195" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_join:95:sector:95:angles" width="0.6615158880922715" height="14" x="0" y="210" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_tile:95:sums" width="0.4293019043922898" height="14" x="0" y="225" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_all:95:sectors" width="0.38623690177094183" height="14" x="0" y="240" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_point:95:update" width="0.3530206780099543" height="14" x="0" y="255" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_prepare:95:sector:95:angles" width="0.17727801948648814" height="14" x="0" y="270" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_prepare:95:df" width="0.1336887472680107" height="14" x="0" y="285" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_normalize" width="0.11897175072001961" height="14" x="0" y="300" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_determine:95:tile" width="0.0806626223012031" height="14" x="0" y="315" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_arctangents" width="0.0" height="14" x="0" y="330" fill="#4988b6" stroke="#4988b6"/><rect id="histogram_2978457_encsvgid_s_explode:95:points" width="0.0" height="14" x="0" y="345" fill="#4988b6" stroke="#4988b6"/><text x="2" text-anchor="start" y="13" font-family="Times" fill="#000000" font-size="12px">separate_easy_hard_way (non-easy way)</text><text x="2" text-anchor="start" y="28" font-family="Times" fill="#000000" font-size="12px">sector_sums</text><text x="2" text-anchor="start" y="43" font-family="Times" fill="#000000" font-size="12px">separate_easy_hard_way (medium way)</text><text x="2" text-anchor="start" y="58" font-family="Times" fill="#000000" font-size="12px">join_sector_info</text><text x="2" text-anchor="start" y="73" font-family="Times" fill="#000000" font-size="12px">medium_way_crossproducts</text><text x="2" text-anchor="start" y="88" font-family="Times" fill="#000000" font-size="12px">separate_easy_hard_way (hard way)</text><text x="2" text-anchor="start" y="103" font-family="Times" fill="#000000" font-size="12px">hard_way_arctangents</text><text x="2" text-anchor="start" y="118" font-family="Times" fill="#000000" font-size="12px">separate_easy_hard_way (easy way)</text><text x="2" text-anchor="start" y="133" font-family="Times" fill="#000000" font-size="12px">cross_join_tile_offsets</text><text x="2" text-anchor="start" y="148" font-family="Times" fill="#000000" font-size="12px">xoyo_sector_creation</text><text x="2" text-anchor="start" y="163" font-family="Times" fill="#000000" font-size="12px">ray_segment_intersections</text><text x="2" text-anchor="start" y="178" font-family="Times" fill="#000000" font-size="12px">add_missing_sectors</text><text x="2" text-anchor="start" y="193" font-family="Times" fill="#000000" font-size="12px">area_calc</text><text x="2" text-anchor="start" y="208" font-family="Times" fill="#000000" font-size="12px">sector_uv_summation</text><text x="2" text-anchor="start" y="223" font-family="Times" fill="#000000" font-size="12px">join_sector_angles</text><text x="2" text-anchor="start" y="238" font-family="Times" fill="#000000" font-size="12px">tile_sums</text><text x="2" text-anchor="start" y="253" font-family="Times" fill="#000000" font-size="12px">all_sectors</text><text x="2" text-anchor="start" y="268" font-family="Times" fill="#000000" font-size="12px">point_update</text><text x="2" text-anchor="start" y="283" font-family="Times" fill="#000000" font-size="12px">prepare_sector_angles</text><text x="2" text-anchor="start" y="298" font-family="Times" fill="#000000" font-size="12px">prepare_df</text><text x="2" text-anchor="start" y="313" font-family="Times" fill="#000000" font-size="12px">normalize</text><text x="2" text-anchor="start" y="328" font-family="Times" fill="#000000" font-size="12px">determine_tile</text><text x="2" text-anchor="start" y="343" font-family="Times" fill="#000000" font-size="12px">arctangents</text><text x="2" text-anchor="start" y="358" font-family="Times" fill="#000000" font-size="12px">explode_points</text><text x="237" text-anchor="end" y="381" font-family="Times" fill="#000000" font-size="12px">1.54</text><text x="5" text-anchor="start" y="381" font-family="Times" fill="#000000" font-size="12px">subroutine_time</text><line x1="242" y1="2" x2="242" y2="384" stroke="#101010" stroke-width="1" stroke-dasharray="3 2" /><text x="244" text-anchor="middle" y="192.0" font-family="Times" fill="#000000" font-size="12px" transform="rotate(90,244,192.0)">subroutine</text><rect width="255" height="383" x="0" y="0" fill-opacity="0.0" stroke="#000000" /></svg>'

In [None]:
#
# This is the original code
# Dataframe Sizes:  5_002_101 | 56_122_592 | 20_335_270
#
df_weight_sum = udspvs_tile_opt.df_weight_sums             [0]
df_easy_way   = udspvs_tile_opt.df_separate_easy_way       [0]
df_medium_way = udspvs_tile_opt.df_medium_way_crossproducts[0]
df_hard_way   = udspvs_tile_opt.df_hard_way_arctangents    [0]

print(f'{len(df_easy_way):_} | {len(df_medium_way):_} | {len(df_hard_way):_}')

# Timing Results | M1 Pro 16G
# 0.25s | 5.02s | 0.60s | 0.00s | 0.01s | 5.88s
# 0.11s | 4.58s | 0.59s | 0.00s | 0.01s | 5.29s
# 0.11s | 4.65s | 0.58s | 0.00s | 0.01s | 5.35s
for i in range(3):
    df_weight_sum = udspvs_tile_opt.df_weight_sums             [0]
    df_easy_way   = udspvs_tile_opt.df_separate_easy_way       [0]
    df_medium_way = udspvs_tile_opt.df_medium_way_crossproducts[0]
    df_hard_way   = udspvs_tile_opt.df_hard_way_arctangents    [0]

    t0 = time.time()
    df_easy_way   = df_easy_way  .group_by(['__index__','x','y','sector']).agg(pl.col('tile_sum').sum().alias('_w_sum_'))
    t1 = time.time()
    df_medium_way = df_medium_way.group_by(['__index__','x','y','sector']).agg(pl.col('w')       .sum().alias('_w_sum_'))
    t2 = time.time()
    df_hard_way   = df_hard_way  .group_by(['__index__','x','y','sector']).agg(pl.col('w_right') .sum().alias('_w_sum_'))
    t3 = time.time()
    df            = pl.concat([df_easy_way, df_medium_way, df_hard_way])
    t4 = time.time()
    df            = df.group_by(['__index__','x','y','sector']).agg(pl.col('_w_sum_').sum()).with_columns((pl.col('_w_sum_') / df_weight_sum).alias('_w_ratio_'))
    t5 = time.time()

    print(f'{t1-t0:.2f}s | {t2-t1:.2f}s | {t3-t2:.2f}s | {t4-t3:.2f}s | {t5-t4:.2f}s | {t5-t0:.2f}s')

df.shape

In [None]:
#
# Don't need to keep the x and y since the __index__ was meant to key into them...
# ... this is negligibly faster
#

# Timing Results | M1 Pro 16G
# 0.10s | 3.72s | 0.52s | 0.00s | 0.01s | 4.36s
# 0.09s | 3.69s | 0.48s | 0.00s | 0.01s | 4.27s
# 0.10s | 3.79s | 0.47s | 0.00s | 0.01s | 4.37s
for i in range(3):
    df_weight_sum = udspvs_tile_opt.df_weight_sums             [0]
    df_easy_way   = udspvs_tile_opt.df_separate_easy_way       [0]
    df_medium_way = udspvs_tile_opt.df_medium_way_crossproducts[0]
    df_hard_way   = udspvs_tile_opt.df_hard_way_arctangents    [0]

    t0 = time.time()
    df_easy_way   = df_easy_way  .group_by(['__index__','sector']).agg(pl.col('tile_sum').sum().alias('_w_sum_'))
    t1 = time.time()
    df_medium_way = df_medium_way.group_by(['__index__','sector']).agg(pl.col('w')       .sum().alias('_w_sum_'))
    t2 = time.time()
    df_hard_way   = df_hard_way  .group_by(['__index__','sector']).agg(pl.col('w_right') .sum().alias('_w_sum_'))
    t3 = time.time()
    df            = pl.concat([df_easy_way, df_medium_way, df_hard_way])
    t4 = time.time()
    df            = df.group_by(['__index__','sector']).agg(pl.col('_w_sum_').sum()).with_columns((pl.col('_w_sum_') / df_weight_sum).alias('_w_ratio_'))
    t5 = time.time()

    print(f'{t1-t0:.2f}s | {t2-t1:.2f}s | {t3-t2:.2f}s | {t4-t3:.2f}s | {t5-t4:.2f}s | {t5-t0:.2f}s')

df.shape

In [None]:
#
# Or maybe just do the aggregation once...
# ... this is about 5x-10x faster than the original
# ... which appears dependent on the points chosen
# ... ... futher tests showed that it may have been between 2x and 3x faster
#

# Timing Results | M1 Pro 16G
# 0.00s | 0.00s | 0.00s | 0.00s | 0.00s | 0.67s | 0.67s
# 0.00s | 0.00s | 0.00s | 0.00s | 0.00s | 0.53s | 0.53s
# 0.00s | 0.00s | 0.00s | 0.00s | 0.00s | 0.43s | 0.43s
for i in range(3):
    df_weight_sum = udspvs_tile_opt.df_weight_sums             [0]
    df_easy_way   = udspvs_tile_opt.df_separate_easy_way       [0]
    df_medium_way = udspvs_tile_opt.df_medium_way_crossproducts[0]
    df_hard_way   = udspvs_tile_opt.df_hard_way_arctangents    [0]

    t0 = time.time()
    df_easy_way   = df_easy_way  .drop(set(df_easy_way  .columns) - set(['__index__','sector','tile_sum'])).rename({'tile_sum':'_w_sum_'})
    t1 = time.time()
    df_medium_way = df_medium_way.drop(set(df_medium_way.columns) - set(['__index__','sector','w'       ])).rename({'w':       '_w_sum_'})
    t2 = time.time()
    df_hard_way   = df_hard_way  .drop(set(df_hard_way  .columns) - set(['__index__','sector','w_right' ])).rename({'w_right': '_w_sum_'})
    t3 = time.time()
    df_hard_way = pl.DataFrame({'__index__': df_hard_way['__index__'], '_w_sum_': df_hard_way['_w_sum_'], 'sector': df_hard_way['sector']})
    t4 = time.time()
    df            = pl.concat([df_easy_way, df_medium_way, df_hard_way])
    t5 = time.time()
    df            = df.group_by(['__index__','sector']).agg(pl.col('_w_sum_').sum()).with_columns((pl.col('_w_sum_') / df_weight_sum).alias('_w_ratio_'))
    t6 = time.time()

    print(f'{t1-t0:.2f}s | {t2-t1:.2f}s | {t3-t2:.2f}s | {t4-t3:.2f}s | {t5-t4:.2f}s | {t6-t5:.2f}s | {t6-t0:.2f}s')

df.shape