In [None]:
#
# Attempts to implement the following:
#
# H. Rave, V. Molchanov and L. Linsen, "Uniform Sample Distribution in Scatterplots via Sector-based Transformation," 
# 2024 IEEE Visualization and Visual Analytics (VIS), St. Pete Beach, FL, USA, 2024, pp. 156-160, 
# doi: 10.1109/VIS55277.2024.00039. 
# keywords: {Data analysis;Visual analytics;Clutter;Scatterplot de-cluttering;spatial transformation},
#

In [None]:
import polars as pl
import numpy as np
from math import cos, sin, pi, sqrt, atan2
from shapely import Polygon
import rtsvg
rt = rtsvg.RACETrack()
df = pl.read_csv('../../data/2013_vast_challenge/mc3_netflow/nf/nf-chunk1.csv')
df = rt.columnsAreTimestamps(df, 'parsedDate')
df = df.rename({'TimeSeconds':                '_del1_', 'parsedDate':                 'timestamp',
                'dateTimeStr':                '_del2_', 'ipLayerProtocol':            'pro',
                'ipLayerProtocolCode':        '_del3_', 'firstSeenSrcIp':             'sip',
                'firstSeenDestIp':            'dip',    'firstSeenSrcPort':           'spt',
                'firstSeenDestPort':          'dpt',    'moreFragments':              '_del4_',
                'contFragments':              '_del5_', 'durationSeconds':            'dur',
                'firstSeenSrcPayloadBytes':   '_del6_', 'firstSeenDestPayloadBytes':  '_del7_',
                'firstSeenSrcTotalBytes':     'soct',   'firstSeenDestTotalBytes':    'doct',
                'firstSeenSrcPacketCount':    'spkt',   'firstSeenDestPacketCount':   'dpkt',
                'recordForceOut':             '_del8_'})
df = df.drop([r'^_del\d_$'])
df = df.sample(2_000_000)
xy = rt.xy(df, x_field='soct', y_field='doct', color_by='dpt', align_pixels=False, dot_size='small')
xy._repr_svg_() # force a render to get the xy's to populate
_xvals_, _yvals_ = list(xy.df[xy.x_axis_col]), list(xy.df[xy.y_axis_col])
print(len(_xvals_), len(_yvals_))
_weights_        = [1.0] * len(_xvals_)
xy

In [None]:
import random
num_of_pts   = [100, 200, 50]
circle_geoms = [(5,5,1),(20,10,2),(8,8,1)]
colors       = ['#ff0000','#006400','#0000ff']
_xvals_, _yvals_, _weights_, _colors_ = [], [], [], []
for i in range(len(num_of_pts)):
    for j in range(num_of_pts[i]):
        a, l = random.random() * 2 * pi, random.random() * circle_geoms[i][2]
        x, y = circle_geoms[i][0] + l * cos(a), circle_geoms[i][1] + l * sin(a)
        _xvals_.append(x), _yvals_.append(y), _weights_.append(1.0), _colors_.append(colors[i])

In [None]:
#
# xyUniformSampleDistributionSectorTransform() - implementation of the referenced paper
#
def xyUniformSampleDistributionSectorTransform(xvals, yvals, weights=None, colors=None, iterations=4, sectors=16, border_perc=0.01, vector_scalar=0.1):
    svgs, svgs_for_sectors = [], []
    # Normalize the coordinates to be between 0.0 and 1.0
    def normalizeCoordinates(xs, ys):
        xmin, ymin, xmax, ymax = min(xs), min(ys), max(xs), max(ys)
        if xmin == xmax: xmin -= 0.0001; xmax += 0.0001
        if ymin == ymax: ymin -= 0.0001; ymax += 0.0001
        xs_new, ys_new = [], []
        for x, y in zip(xs, ys):
            xs_new.append((x-xmin)/(xmax-xmin))
            ys_new.append((y-ymin)/(ymax-ymin))
        return xs_new, ys_new
    # Force all the coordinates to be between 0 and 1
    xvals, yvals =normalizeCoordinates(xvals, yvals)    
    xmin, ymin, xmax, ymax = 0.0, 0.0, 1.0, 1.0
    xperc, yperc = (xmax-xmin)*border_perc, (ymax-ymin)*border_perc
    xmin, ymin, xmax, ymax = xmin-xperc, ymin-yperc, xmax+xperc, ymax+yperc
    # Determine the average density (used for expected density calculations)
    if weights is None: weights = np.ones(len(xvals))
    weight_sum = sum(weights)
    area_total = ((xmax-xmin)*(ymax-ymin))
    density_avg = weight_sum / area_total
    # Determine the side and xy that a specific ray hits
    def sideAndXY(xy, uv):
        _xyi_ = rt.rayIntersectsSegment(xy, uv, (xmin, ymin), (xmax, ymin))
        if _xyi_ is not None: return 0, _xyi_
        _xyi_ = rt.rayIntersectsSegment(xy, uv, (xmax, ymin), (xmax, ymax))
        if _xyi_ is not None: return 1, _xyi_
        _xyi_ = rt.rayIntersectsSegment(xy, uv, (xmax, ymax), (xmin, ymax))
        if _xyi_ is not None: return 2, _xyi_
        _xyi_ = rt.rayIntersectsSegment(xy, uv, (xmin, ymax), (xmin, ymin))
        if _xyi_ is not None: return 3, _xyi_
        # hacking the corner cases ... literally the corners
        if xy[0] >= xmin and xy[0] <= xmax and xy[1] >= ymin and xy[1] <= ymax:
            if uv == (0.0, 0.0):
                print(xy, uv, (xmin,ymin,xmax,ymax))
                raise Exception('No Intersection Found for sideAndXY() ... ray is (0,0)')
            else:
                xp, yp, up, vp = round(xy[0], 2), round(xy[1], 2), round(uv[0], 2), round(uv[1], 2)
                if abs(xp) == abs(yp) and abs(up) == abs(vp):
                    if   up < 0.0 and vp < 0.0: return 0, (xmin, ymin)
                    elif up < 0.0 and vp > 0.0: return 1, (xmax, ymin)
                    elif up > 0.0 and vp > 0.0: return 2, (xmax, ymax)
                    elif up > 0.0 and vp < 0.0: return 3, (xmin, ymax)
                print(xy, uv, (xmin,ymin,xmax,ymax))
                raise Exception('No Intersection Found for sideAndXY() ... xy or uv are not equal to one another')
        else:
            print(xy, uv, (xmin,ymin,xmax,ymax))
            raise Exception('No Intersection Found for sideAndXY() ... point not within bounds')
    # Calculate the sector angles
    _sector_angles_, _sector_anchor_ = [], []
    a, ainc = 0.0, 2*pi/sectors
    for s in range(sectors):
        _sector_angles_.append((a, a+ainc))
        _sector_anchor_.append(a + pi + ainc/2.0)
        a += ainc
    # Calculate the UV vector for a specific point
    def ptUVVec(x,y):
        svg_sectors = [f'<svg x="0" y="0" width="512" height="512" viewBox="{xmin} {ymin} {xmax-xmin} {ymax-ymin}" xmlns="http://www.w3.org/2000/svg">']
        svg_sectors.append(f'<rect x="{xmin}" y="{ymin}" width="{xmax-xmin}" height="{ymax-ymin}" fill="#ffffff" />')
        _sector_sum_ = {}
        for s in range(sectors): _sector_sum_[s] = 0.0
        # Iterate over all points ... adding to the sector sum for the correct sector
        for i in range(len(xvals)):
            _x_, _y_, _w_ = xvals[i], yvals[i], weights[i]
            if _x_ == x and _y_ == y: continue
            _dx_, _dy_ = _x_ - x, _y_ - y
            a = atan2(_dy_, _dx_)
            for s in range(sectors):
                if a >= _sector_angles_[s][0] and a < _sector_angles_[s][1]:
                    _sector_sum_[s] += _w_
                    _color_ = rt.co_mgr.getColor(s)
                    svg_sectors.append(f'<circle cx="{_x_}" cy="{_y_}" r="0.01" stroke="{_color_}" stroke-width="0.001" fill="{_color_}" />')
                    break
        # Determine the area for each sector (from this points perspective)
        _sector_area_, _poly_definition_ = {}, {}
        for s in range(sectors):
            uv          = (cos(_sector_angles_[s][0]), sin(_sector_angles_[s][0]))
            side_and_xy_a0 = sideAndXY((x,y), uv)
            uv = (cos(_sector_angles_[s][1]), sin(_sector_angles_[s][1]))
            side_and_xy_a1 = sideAndXY((x,y), uv)
            if side_and_xy_a0[0] == side_and_xy_a1[0]: _poly_definition_[s] = [(x,y), side_and_xy_a0[1], side_and_xy_a1[1]] # same side
            else:
                if   side_and_xy_a0[0] == 0 and side_and_xy_a1[0] == 1: _poly_definition_[s] = [(x,y), side_and_xy_a0[1], (xmax,ymin), side_and_xy_a1[1]] # top 
                elif side_and_xy_a0[0] == 1 and side_and_xy_a1[0] == 2: _poly_definition_[s] = [(x,y), side_and_xy_a0[1], (xmax,ymax), side_and_xy_a1[1]] # right
                elif side_and_xy_a0[0] == 2 and side_and_xy_a1[0] == 3: _poly_definition_[s] = [(x,y), side_and_xy_a0[1], (xmin,ymax), side_and_xy_a1[1]] # bottom
                elif side_and_xy_a0[0] == 3 and side_and_xy_a1[0] == 0: _poly_definition_[s] = [(x,y), side_and_xy_a0[1], (xmin,ymin), side_and_xy_a1[1]] # left
            _poly_ = Polygon(_poly_definition_[s])
            _sector_area_[s] = _poly_.area
        # From the paper ... weight the anchor the difference between the expected and actual density
        _scalar_ = vector_scalar
        u, v = 0.0, 0.0
        for s in range(sectors):
            _diff_ = (_sector_sum_[s]/weight_sum) - (_sector_area_[s]/area_total)
            u, v   = u + _scalar_ * _diff_ * cos(_sector_anchor_[s]), v + _scalar_ * _diff_ * sin(_sector_anchor_[s])
            _poly_coords_ = _poly_definition_[s]
            d      = f'M {_poly_coords_[0][0]} {_poly_coords_[0][1]} '
            for i in range(1, len(_poly_coords_)): d += f'L {_poly_coords_[i][0]} {_poly_coords_[i][1]} '
            d += 'Z'
            if _diff_ < 0.0: _color_ = rt.co_mgr.getColor(s) # '#0000ff'
            else:            _color_ = rt.co_mgr.getColor(s) # '#ff0000'
            svg_sectors.append(f'<path d="{d}" stroke="{rt.co_mgr.getColor(s)}" fill="{_color_}" fill-opacity="0.3" stroke-width="0.002"/>')
        # Return the value
        svg_sectors.append(f'<line x1="{x}" y1="{y}" x2="{x+3*u}" y2="{y+3*v}" stroke="#ff0000" stroke-width="0.01" />')
        svg_sectors.append('</svg>')
        svgs_for_sectors.append(''.join(svg_sectors))
        return u,v

    # Iterations...
    for iters in range(iterations):
        svg = [f'<svg x="0" y="0" width="256" height="256" viewBox="{xmin} {ymin} {xmax-xmin} {ymax-ymin}" xmlns="http://www.w3.org/2000/svg">']
        svg.append(f'<rect x="{xmin}" y="{ymin}" width="{xmax-xmin}" height="{ymax-ymin}" x="0" y="0" fill="#ffffff" />')
        xvals_next, yvals_next = [], []
        for j in range(len(xvals)):
            _x_, _y_ = xvals[j], yvals[j]
            uv = ptUVVec(_x_, _y_)
            svg.append(f'<line x1="{_x_}" y1="{_y_}" x2="{_x_+uv[0]}" y2="{_y_+uv[1]}" stroke="#a0a0a0" stroke-width="0.001" />')
            _color_ = colors[j] if colors is not None else '#000000'
            svg.append(f'<circle cx="{_x_}" cy="{_y_}" r="0.004" fill="{_color_}" />')
            _x_next_, _y_next_ = _x_ + uv[0], _y_ + uv[1]
            xvals_next.append(_x_next_), yvals_next.append(_y_next_)
        svg.append('</svg>')
        svgs.append(''.join(svg))
        xvals, yvals = xvals_next, yvals_next
        xvals, yvals = normalizeCoordinates(xvals, yvals)    
        xmin, ymin, xmax, ymax = 0.0, 0.0, 1.0, 1.0
        xperc, yperc = (xmax-xmin)*border_perc, (ymax-ymin)*border_perc
        xmin, ymin, xmax, ymax = xmin-xperc, ymin-yperc, xmax+xperc, ymax+yperc

    # Return
    return xvals, yvals, svgs, svgs_for_sectors

x_new, y_new, svgs, svgs_for_sectors = xyUniformSampleDistributionSectorTransform(_xvals_, _yvals_, _weights_, _colors_, iterations=64, border_perc=0.1, vector_scalar=0.1)
rt.table(svgs, per_row=8, spacer=10)

In [None]:
rt.table(svgs_for_sectors[0:12], per_row=4, spacer=10)