In [None]:
import pandas as pd
import polars as pl
import numpy as np
from math import pi, sin, cos
import sys
sys.path.insert(1, '../rtsvg')
from rtsvg import *
rt = RACETrack()
_base_ = '../../data/2013_vast_challenge/mc3_netflow/nf/'
df_orig = pl.concat([pl.read_csv(_base_ + 'nf-chunk1.csv'),
                     pl.read_csv(_base_ + 'nf-chunk2.csv'),
                     pl.read_csv(_base_ + 'nf-chunk3.csv')])
df_orig = df_orig.rename({'TimeSeconds':'secs',                  'parsedDate':'timestamp',                'dateTimeStr':'timestamp_str',
                         'ipLayerProtocol':'pro_str',           'ipLayerProtocolCode':'pro',             'firstSeenSrcIp':'sip',
                        'firstSeenDestIp':'dip',               'firstSeenSrcPort':'spt',                'firstSeenDestPort':'dpt',
                        'moreFragments':'mfrag',               'contFragments':'cfrag',                 'durationSeconds':'dur',
                        'firstSeenSrcPayloadBytes':'soct_pay', 'firstSeenDestPayloadBytes':'doct_pay',  'firstSeenSrcTotalBytes':'soct',
                        'firstSeenDestTotalBytes':'doct',      'firstSeenSrcPacketCount':'spkt',        'firstSeenDestPacketCount':'dpkt',
                        'recordForceOut':'out'})
df_orig = rt.columnsAreTimestamps(df_orig, 'timestamp')
df = df_orig.sample(1000)
cd = rt.chordDiagram(df, [('sip','dip')], equal_size_nodes=True)
cd

In [None]:
import hdbscan

handled    = set()
fmto_list  = []
angle_list = []
span_list  = []
xs_list    = []
ys_list    = []
for node in cd.node_dir_arc:
    for fm in cd.node_dir_arc[node]:
        for to in cd.node_dir_arc[node][fm]:
            key = str(fm) + '|||' + str(to)
            if key not in handled:
                handled.add(key)
                fm_span  = cd.node_dir_arc[node][fm][to]
                fm_coord = (fm_span[0]+fm_span[1])/720.0 - 0.5
                xs_list.append(fm_coord)
                if fm == node:
                    to_span  = cd.node_dir_arc[to][fm][to]
                    to_coord = (to_span[0]+to_span[1])/720.0 - 0.5
                    fmto_list.append(key)
                    span_list.append((fm_coord,to_coord))
                    ys_list.append(to_coord)
                else:
                    to_span  = cd.node_dir_arc[fm][fm][to]
                    to_coord = (to_span[0]+to_span[1])/720.0 - 0.5
                    fmto_list.append(key)
                    span_list.append((fm_coord,to_coord))
                    ys_list.append(to_coord)
                angle_list.append((fm_span,to_span))
            else:
                pass
                # print(f'"{key}" already handled')
clusterer = hdbscan.HDBSCAN()
clusterer.fit(span_list)
print("n_clusters =", len(set(clusterer.labels_)))
rt.xy(pd.DataFrame({'x':xs_list,'y':ys_list,'c':clusterer.labels_}),x_field='x',y_field='y',color_by='c')

In [None]:
cluster_indexes = {}
for i in range(len(angle_list)):
    _label_ = clusterer.labels_[i]
    if _label_ != -1:
        if _label_ not in cluster_indexes.keys():
            cluster_indexes[_label_] = []
        cluster_indexes[_label_].append(i)

w,h,r=512,512,200
cx,cy=w/2,h/2
svg  = f'<svg x="0" y="0" width="{w}" height="{h}"><rect x="0" y="0" width="{w}" height="{h}" fill="#ffffff" />'
for i in range(len(angle_list)):
    if clusterer.labels_[i] == -1:
        fm_span,  to_span  = angle_list[i][0], angle_list[i][1]
        fm_angle, to_angle = (fm_span[0]+fm_span[1])/2.0 , (to_span[0]+to_span[1])/2.0
        _color_ = rt.co_mgr.getColor(clusterer.labels_[i])
        x1,y1,x2,y2 = cx+r*cos(fm_angle*pi/180),cy+r*sin(fm_angle*pi/180),cx+r*cos(to_angle*pi/180),cy+r*sin(to_angle*pi/180)
        angle_d   = 180 - abs(abs(fm_angle - to_angle) - 180)
        _ratio_   = 0.8 - 0.8 * angle_d/180
        x_pull0, y_pull0 = cx + r * _ratio_ * cos(pi*fm_angle/180.0), cy + r * _ratio_ * sin(pi*fm_angle/180.0)
        x_pull1, y_pull1 = cx + r * _ratio_ * cos(pi*to_angle/180.0), cy + r * _ratio_ * sin(pi*to_angle/180.0)
        _path_ = f'M {x1} {y1} C {x_pull0} {y_pull0} {x_pull1} {y_pull1} {x2} {y2}'
        svg += f'<path d="{_path_}" stroke="#000000" stroke-opacity="0.8" fill="none" />'

for _label_ in cluster_indexes.keys():
    _color_ = rt.co_mgr.getColor(_label_)
    # Accumulate the pulling values at various intervals
    xs,ys   = {},{}
    #ts      = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    #pull    = [1,  1,  1,  2,  2,  2,  1,  1,  1]
    ts      = [0.2, 0.4, 0.6, 0.8]
    pull    = [1,   2,   2,   1]
    for t in ts:
        xs[t],ys[t] = [],[]
    for i in cluster_indexes[_label_]:
        fm_span,  to_span  = angle_list[i][0], angle_list[i][1]
        fm_angle, to_angle = (fm_span[0]+fm_span[1])/2.0 , (to_span[0]+to_span[1])/2.0
        x1,y1,x2,y2 = cx+r*cos(fm_angle*pi/180),cy+r*sin(fm_angle*pi/180),cx+r*cos(to_angle*pi/180),cy+r*sin(to_angle*pi/180)
        angle_d   = 180 - abs(abs(fm_angle - to_angle) - 180)
        _ratio_   = 0.8 - 0.8 * angle_d/180
        x_pull0, y_pull0 = cx + r * _ratio_ * cos(pi*fm_angle/180.0), cy + r * _ratio_ * sin(pi*fm_angle/180.0)
        x_pull1, y_pull1 = cx + r * _ratio_ * cos(pi*to_angle/180.0), cy + r * _ratio_ * sin(pi*to_angle/180.0)
        bc = rt.bezierCurve((x1,y1),(x_pull0,y_pull0),(x_pull1,y_pull1),(x2,y2))
        for t in ts:
            x,y = bc(t)
            xs[t].append(x),ys[t].append(y)
    # Averages
    xavg,yavg = {},{}
    for t in ts:
        xavg[t], yavg[t] = sum(xs[t])/len(xs[t]), sum(ys[t])/len(ys[t])
    # Create the Averaged Curves
    for i in cluster_indexes[_label_]:
        fm_span,  to_span  = angle_list[i][0], angle_list[i][1]
        fm_angle, to_angle = (fm_span[0]+fm_span[1])/2.0 , (to_span[0]+to_span[1])/2.0
        x1,y1,x2,y2 = cx+r*cos(fm_angle*pi/180),cy+r*sin(fm_angle*pi/180),cx+r*cos(to_angle*pi/180),cy+r*sin(to_angle*pi/180)
        angle_d   = 180 - abs(abs(fm_angle - to_angle) - 180)
        _ratio_   = 0.8 - 0.8 * angle_d/180
        x_pull0, y_pull0 = cx + r * _ratio_ * cos(pi*fm_angle/180.0), cy + r * _ratio_ * sin(pi*fm_angle/180.0)
        x_pull1, y_pull1 = cx + r * _ratio_ * cos(pi*to_angle/180.0), cy + r * _ratio_ * sin(pi*to_angle/180.0)
        bc = rt.bezierCurve((x1,y1),(x_pull0,y_pull0),(x_pull1,y_pull1),(x2,y2))
        xs,ys=[],[]
        xs.append(x1),ys.append(y1)
        for j in range(len(ts)):
            t   = ts[j]
            p   = pull[j]
            x,y = bc(t)
            xs.append((x+p*xavg[t])/(1+p)),ys.append((y+p*yavg[t])/(1+p))
        xs.append(x2),ys.append(y2)
        #for j in range(len(xs)-1):
        #    svg += f'<line x1="{xs[j]}" y1="{ys[j]}" x2="{xs[j+1]}" y2="{ys[j+1]}" stroke="{_color_}" opacity="0.2" />'
        p = f'M {xs[0]} {ys[0]} '
        for j in range(1,len(xs)-1):
            uv0 = rt.unitVector(((xs[j-1],ys[j-1]),(xs[j],  ys[j])))
            uv1 = rt.unitVector(((xs[j+1],ys[j+1]),(xs[j],  ys[j])))
            l   = 0.4*rt.segmentLength(((xs[j],ys[j]),(xs[j+1],ys[j+1])))
            p  += f' C {xs[j-1]+l*uv0[0]} {ys[j-1]+l*uv0[1]} {xs[j]+l*uv1[0]} {ys[j]+l*uv1[1]} {xs[j]} {ys[j]}'
        p += f' L {xs[-1]} {ys[-1]}'
        svg += f'<path d="{p}" fill="none" stroke="{_color_}" opacity="0.4" />'

svg += '</svg>'
rt.displaySVG(svg)

In [None]:
w,h,r,rp=512,512,200,190
cx,cy=w/2,h/2
svg  = f'<svg x="0" y="0" width="{w}" height="{h}"><rect x="0" y="0" width="{w}" height="{h}" fill="#ffffff" />'
for i in range(len(angle_list)):
    if clusterer.labels_[i] == -1:
        fm_span,  to_span  = angle_list[i][0], angle_list[i][1]
        fm_angle, to_angle = (fm_span[0]+fm_span[1])/2.0 , (to_span[0]+to_span[1])/2.0
        _color_ = rt.co_mgr.getColor(clusterer.labels_[i])
        x1,y1,x2,y2 = cx+r*cos(fm_angle*pi/180),cy+r*sin(fm_angle*pi/180),cx+r*cos(to_angle*pi/180),cy+r*sin(to_angle*pi/180)
        angle_d   = 180 - abs(abs(fm_angle - to_angle) - 180)
        _ratio_   = 0.8 - 0.8 * angle_d/180
        x_pull0, y_pull0 = cx + r * _ratio_ * cos(pi*fm_angle/180.0), cy + r * _ratio_ * sin(pi*fm_angle/180.0)
        x_pull1, y_pull1 = cx + r * _ratio_ * cos(pi*to_angle/180.0), cy + r * _ratio_ * sin(pi*to_angle/180.0)
        _path_ = f'M {x1} {y1} C {x_pull0} {y_pull0} {x_pull1} {y_pull1} {x2} {y2}'
        svg += f'<path d="{_path_}" stroke="#000000" stroke-opacity="0.4" fill="none" />'

for _label_ in cluster_indexes.keys():
    _color_ = rt.co_mgr.getColor(_label_)
    # Accumulate the pulling values at various intervals
    xs,ys   = {},{}
    #ts      = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    #pull    = [1,  1,  1,  2,  2,  2,  1,  1,  1]
    ts      = [0.2,0.4,0.6,0.8]
    pull    = [1,  1,  1,  1]
    for t in ts:
        xs[t],ys[t] = [],[]
    for i in cluster_indexes[_label_]:
        fm_span,  to_span  = angle_list[i][0], angle_list[i][1]
        fm_angle, to_angle = (fm_span[0]+fm_span[1])/2.0 , (to_span[0]+to_span[1])/2.0
        x1,y1,x2,y2 = cx+r*cos(fm_angle*pi/180),cy+r*sin(fm_angle*pi/180),cx+r*cos(to_angle*pi/180),cy+r*sin(to_angle*pi/180)
        angle_d   = 180 - abs(abs(fm_angle - to_angle) - 180)
        _ratio_   = 0.8 - 0.8 * angle_d/180
        x_pull0, y_pull0 = cx + r * _ratio_ * cos(pi*fm_angle/180.0), cy + r * _ratio_ * sin(pi*fm_angle/180.0)
        x_pull1, y_pull1 = cx + r * _ratio_ * cos(pi*to_angle/180.0), cy + r * _ratio_ * sin(pi*to_angle/180.0)
        bc = rt.bezierCurve((x1,y1),(x_pull0,y_pull0),(x_pull1,y_pull1),(x2,y2))
        for t in ts:
            x,y = bc(t)
            xs[t].append(x),ys[t].append(y)
    # Averages
    xavg,yavg,xavg_ls,yavg_ls = {},{},[],[]
    for t in ts:
        xavg[t], yavg[t] = sum(xs[t])/len(xs[t]), sum(ys[t])/len(ys[t])
        xavg_ls.append(xavg[t]), yavg_ls.append(yavg[t])
    p = f'M {xavg_ls[0]} {yavg_ls[0]}'
    for i in range(1, len(xavg_ls)):
        p += f' L {xavg_ls[i]} {yavg_ls[i]}'
    xy1_uv = rt.unitVector(((cx,cy),(xavg_ls[0],yavg_ls[0])))
    x1_edge, y1_edge = xavg_ls[0]+4*xy1_uv[0],yavg_ls[0]+4*xy1_uv[1]
    xy2_uv = rt.unitVector(((cx,cy),(xavg_ls[-1],yavg_ls[-1])))
    x2_edge, y2_edge = xavg_ls[-1]+4*xy2_uv[0],yavg_ls[-1]+4*xy2_uv[1]
    # Create the Averaged Curves
    edges_in_cluster, p_tendrils = 0, ''
    for i in cluster_indexes[_label_]:
        edges_in_cluster += 0.2
        fm_span,  to_span  = angle_list[i][0], angle_list[i][1]
        fm_angle, to_angle = (fm_span[0]+fm_span[1])/2.0 , (to_span[0]+to_span[1])/2.0
        x1, y1, x2, y2  = cx+r *cos(fm_angle*pi/180), cy+r *sin(fm_angle*pi/180), cx+r *cos(to_angle*pi/180), cy+r *sin(to_angle*pi/180)
        x1p,y1p,x2p,y2p = cx+rp*cos(fm_angle*pi/180), cy+rp*sin(fm_angle*pi/180), cx+rp*cos(to_angle*pi/180), cy+rp*sin(to_angle*pi/180)
        # p_tendrils += f' M {x1} {y1} C {x1p} {y1p} {x1p} {y1p} {xavg_ls[0]} {yavg_ls[0]}'
        p_tendrils += f' M {x1} {y1} C {x1p} {y1p} {x1_edge} {y1_edge} {xavg_ls[0]} {yavg_ls[0]}'
        p_tendrils += f' M {x2} {y2} C {x2p} {y2p} {x2_edge} {y2_edge} {xavg_ls[-1]} {yavg_ls[-1]}'
    edges_in_cluster = min(edges_in_cluster, 4.0)
    svg += f'<path d="{p}" stroke="{_color_}" stroke-width="{edges_in_cluster}" fill="none" />'
    svg += f'<path d="{p_tendrils}" stroke="{_color_}" stroke-width="0.6" fill="none" />'

svg += '</svg>'
rt.displaySVG(svg)

In [None]:
rt.histogram(df_orig, bin_by='sip', count_by='dip')

In [None]:
_ips_ = ['172.10.0.4','172.20.0.4']
rt.chordDiagram(df_orig.filter(pl.col('sip').is_in(_ips_)), [('sip','dip')], draw_labels=False)

In [None]:
rt.smallMultiples(df_orig.filter(pl.col('sip').is_in(_ips_)),
                  category_by=rt.createTField('timestamp','year_month_day'), x_axis_independent=True,                  
                  sm_type='chordDiagram', sm_params={'relationships':[('sip','dip')]})

In [None]:
rt.smallMultiples(df_orig.filter(pl.col('sip').is_in(_ips_)),
                  category_by=rt.createTField('timestamp','year_month_day'), x_axis_independent=False, y_axis_independent=False,
                  sm_type='chordDiagram', sm_params={'relationships':[('sip','dip')]})

In [None]:
rt.smallMultiples(df_orig.filter(pl.col('sip').is_in(_ips_)),
                  category_by=rt.createTField('timestamp','year_month_day'), x_axis_independent=False, y_axis_independent=True,
                  sm_type='chordDiagram', sm_params={'relationships':[('sip','dip')]})

In [None]:
rt.smallMultiples(df_orig.filter(pl.col('sip').is_in(_ips_)),
                  category_by=rt.createTField('timestamp','year_month_day'), x_axis_independent=True, y_axis_independent=True,
                  sm_type='chordDiagram', sm_params={'relationships':[('sip','dip')]})

In [None]:

rt.smallMultiples(df_orig.filter(pl.col('sip').is_in(_ips_)),
                  category_by=rt.createTField('timestamp','year_month_day'), x_axis_independent=True, y_axis_independent=False,
                  sm_type='chordDiagram', sm_params={'relationships':[('sip','dip')]})

In [None]:

rt.smallMultiples(df_orig.filter(pl.col('sip').is_in(_ips_)).sample(1000), 
                  category_by=rt.createTField('timestamp','year_month_day'), x_axis_independent=True,
                  sm_type='chordDiagram', sm_params={'relationships':[('sip',('dip','dpt'))]})

In [None]:
rt.chordDiagram(df_orig.filter(pl.col('sip').is_in(_ips_)), [('sip','dip')], draw_labels=False).renderSVG(True)