In [None]:
import polars as pl
import numpy as np
import networkx as nx
from datetime import datetime, timedelta
import sys
sys.path.insert(1, '../../rtsvg')
from rtsvg import *
rt = RACETrack()
_base_ = '../../../data/2013_vast_challenge/mc3_netflow/nf/'
df_orig = pl.concat([pl.read_csv(_base_ + 'nf-chunk1.csv'), pl.read_csv(_base_ + 'nf-chunk2.csv'), pl.read_csv(_base_ + 'nf-chunk3.csv')])
df_orig = df_orig.rename({'TimeSeconds':'secs',                  'parsedDate':'timestamp',                'dateTimeStr':'timestamp_str',
                          'ipLayerProtocol':'pro_str',           'ipLayerProtocolCode':'pro',             'firstSeenSrcIp':'sip',
                          'firstSeenDestIp':'dip',               'firstSeenSrcPort':'spt',                'firstSeenDestPort':'dpt',
                          'moreFragments':'mfrag',               'contFragments':'cfrag',                 'durationSeconds':'dur',
                          'firstSeenSrcPayloadBytes':'soct_pay', 'firstSeenDestPayloadBytes':'doct_pay',  'firstSeenSrcTotalBytes':'soct',
                          'firstSeenDestTotalBytes':'doct',      'firstSeenSrcPacketCount':'spkt',        'firstSeenDestPacketCount':'dpkt',
                          'recordForceOut':'out'})
df_orig = rt.columnsAreTimestamps(df_orig, 'timestamp')
df      = df_orig.sample(10_000)
print(f'{len(df_orig)=} | {len(df)=}')

In [None]:
#
# detectDenialOfService() - detect simple denial of service
# - From Claude (v3 Sonnet) with minor modifications on port names
# - Doesn't appear to work correctly... believe the results were servers from the dataset
#
def detectDenialOfService(netflow_df):
    # Group the data by source IP and a window of 1 minute
    grouped = netflow_df.group_by([
        "sip",
        pl.col("timestamp").dt.truncate("1m").alias("time_window")
    ]).agg(
        pl.col("spkt").sum().alias("packet_count"),
        pl.col("soct").sum().alias("total_bytes")
    )

    # Filter groups with packet count or total bytes exceeding the threshold
    potential_dos = grouped.filter(
        (pl.col("packet_count") > 1000) | (pl.col("total_bytes") > 10_000_000)
    )
    return potential_dos

#
# detectDistributedDenialOfService() - detect simple denial of service
# - From Claude (v3 Sonnet) with minor modifications on port names
# - Doesn't appear to work correctly... thresholds are too high
#
def detectDistributedDenialOfService(netflow_df):
    # Group the data by destination IP and a window of 1 minute
    grouped = netflow_df.group_by([
        "dip",
        pl.col("timestamp").dt.truncate("1m").alias("time_window")
    ]).agg(
        pl.col("spkt").sum().alias("packet_count"),
        pl.col("soct").sum().alias("total_bytes"),
        pl.col("sip").n_unique().alias("unique_src_ips")
    )

    # Filter groups with packet count, total bytes, and unique source IPs exceeding the thresholds
    potential_ddos = grouped.filter(
        (pl.col("packet_count")   > 10_000) &
        (pl.col("total_bytes")    > 100_000_000) &
        (pl.col("unique_src_ips") > 50)
    )
    return potential_ddos

#
# detectHorizontalPortScanning()
# - From Claude (v3 Sonnet) with minor modifications on port names
#
def detectHorizontalPortScanning(netflow_df):
    # Group the data by source IP, and a window of 5 minutes
    grouped = netflow_df.group_by([
        "sip",
        pl.col("timestamp").dt.truncate("5m").alias("time_window")
    ]).agg(
        pl.col("dip").n_unique().alias("unique_dst_ips")
    )
    # Filter groups with unique destination IPs exceeding the threshold (e.g., 10)
    potential_scanners = grouped.filter(pl.col("unique_dst_ips") > 5)
    return potential_scanners

#
# detectVerticalPortScanning()
# - From Claude (v3 Sonnet) with minor modifications on port names
#
def detectVerticalPortScanning(netflow_df):
    # Group the data by source IP, destination IP, and a window of 5 minutes
    grouped = netflow_df.group_by([
        "sip",
        "dip",
        pl.col("timestamp").dt.truncate("5m").alias("time_window")
    ]).agg(
        pl.col("dpt").n_unique().alias("unique_ports")
    )

    # Filter groups with unique ports exceeding the threshold (e.g., 10)
    potential_scanners = grouped.filter(pl.col("unique_ports") > 10)
    return potential_scanners

# Print the potential scanners
scans_svgs, entities_of_interest, widget_id_lu = [], {}, {}
vscans      = detectVerticalPortScanning(df)
for i in range(len(vscans)):
    sip, dip, t, up = vscans[i]["sip"][0], vscans[i]["dip"][0], vscans[i]["time_window"][0], vscans[i]["unique_ports"][0]
    df_sub = df.filter(pl.col("sip") == sip).filter(pl.col("timestamp").is_between(t - timedelta(minutes=30), t + timedelta(minutes=30)))
    parent_lu, parent_set = {}, set()
    for i in range(len(df_sub)):
        sip, dip, dpt = df_sub["sip"][i], df_sub["dip"][i], df_sub["dpt"][i]
        parent_lu[f'{dip}|{dpt}'] = dip
        parent_set.add(dip)
        parent_lu[sip] = '.'.join(sip.split('.')[:3])
    cd  = rt.chordDiagram(df_sub, [('sip', ('dip','dpt'))], parent_lu=parent_lu, draw_labels=True, label_style='circular', label_only=set([sip]), w=300, h=300, x_ins=32, y_ins=32, draw_border=False)
    wid = cd.widgetId()
    entities_of_interest[wid] = set([sip]) | parent_set
    widget_id_lu[wid] = cd
    scans_svgs.append(cd)
hscans      = detectHorizontalPortScanning(df)
for i in range(len(hscans)):
    sip, t, uips = hscans[i]["sip"][0], hscans[i]["time_window"][0], hscans[i]["unique_dst_ips"][0]
    df_sub = df.filter(pl.col("sip") == sip).filter(pl.col("timestamp").is_between(t - timedelta(minutes=30), t + timedelta(minutes=30)))
    cd  = rt.chordDiagram(df_sub, [('sip', 'dip',)], draw_labels=True, label_style='radial', w=256, h=256, x_ins=48, y_ins=48,draw_border=False)
    wid = cd.widgetId()
    entities_of_interest[wid] = set(df_sub['sip'])
    widget_id_lu[wid] = cd
    scans_svgs.append(cd)
rt.table(scans_svgs, per_row=8)

In [None]:
all_entities_of_interest = set()
for wid in widget_id_lu:
    all_entities_of_interest = all_entities_of_interest | entities_of_interest[wid]
# make the networkx graph for how to position each chord diagram
fms, tos = [], []
for wid in widget_id_lu:
    for ent in entities_of_interest[wid]:
        for wid2 in widget_id_lu:
            if ent in entities_of_interest[wid2]:
                fms.append(wid)
                tos.append(wid2)
df_comp = pd.DataFrame({'fm':fms,'to':tos})
g_nx    = rt.createNetworkXGraph(df_comp, [('fm', 'to')])
pos     = nx.spring_layout(g_nx)
rt.linkNode(df_comp, [('fm','to')], link_shape='curve', link_color="#a0a0a0", pos=pos)

In [None]:
from math import floor
# findMiddlePointIndex() -- find the closest middle point balancing out x & y
def findMiddlePointIndex(pts):
    cxs = []
    for i in range(len(pts)):
        cxs.append((pts[i][0], i))
    cxs.sort()
    m   = floor(len(cxs)/2)
    cys = []
    m_i_lt = cxs[m-1][1]
    m_i    = cxs[m][1]
    m_i_gt = cxs[m+1][1]
    cys.append((pts[m_i_lt][1], m_i_lt))
    cys.append((pts[m_i]   [1], m_i))
    cys.append((pts[m_i_gt][1], m_i_gt))
    cys.sort()
    return cys[1][1]

# Crunch the circles
circles, min_r, wid_to_circle_i = [], None, {}
for wid in widget_id_lu:
    r  = widget_id_lu[wid].r
    min_r = r if min_r is None else min(min_r,r)
    xy = pos[wid]
    circles.append((xy[0], xy[1], r, wid))
    wid_to_circle_i[wid] = len(circles)-1
placement = rt.crunchCircles(circles, 2*min_r)
inter_circle_d = 2*min_r

# at this point, just make the canvas big enough to hold everything
xmin,ymin,xmax,ymax,rmax = None, None, None, None, None
for i in range(len(placement)):
    x, y, r, wid = placement[i]
    xmin = x if xmin is None else min(xmin,x-r)
    ymin = y if ymin is None else min(ymin,y-r)
    xmax = x if xmax is None else max(xmax,x+r)
    ymax = y if ymax is None else max(ymax,y+r)
    rmax = r if rmax is None else max(rmax,r)
canvas_w, canvas_h = xmax - xmin, ymax-ymin
xT = lambda x: (x-xmin)/(xmax-xmin)*(canvas_w-2*rmax)+rmax
yT = lambda y: (canvas_h)-((y-ymin)/(ymax-ymin)*(canvas_h-2*rmax)+rmax) # inverted y axis (to match network x positioning)
wid_placement = {}
svg_canvas = [f'<svg width="{1024}" height="{1024}" x="0" y="0" viewBox="0 0 {canvas_w} {canvas_h}" xmlns="http://www.w3.org/2000/svg">']
circles_t  = [] # transformed circles
for i in range(len(placement)):
    wx, wy, r, wid = placement[i]
    sx,sy     = xT(wx), yT(wy)
    circles_t.append((sx, sy, r, wid))
    cd        = widget_id_lu[wid]
    cd_w,cd_h = cd.w, cd.h
    cd_svg    = cd._repr_svg_()
    svg_canvas.append(f'<svg x="{sx-cd_w/2}" y="{sy-cd_h/2}" width="{cd_w}" height="{cd_h}">')
    wid_placement[wid] = (sx-cd_w/2,sy-cd_h/2)
    svg_canvas.append(cd_svg)
    svg_canvas.append('</svg>')

for _entity_ in list(all_entities_of_interest)[:3]: # truncate for testing
    pts = []
    for wid in entities_of_interest:
        if _entity_ in entities_of_interest[wid]:
            sx,sy = wid_placement[wid]
            eps   = widget_id_lu[wid].entityPositions(_entity_)
            ap    = eps[0].attachmentPointVecs()[0]
            pts.append((sx+ap[0],sy+ap[1],wid_to_circle_i[wid]))
    if len(pts) > 1:
        if len(pts) > 2:
            i = findMiddlePointIndex(pts)
            entry_pt = pts[i]
            exit_pts = pts[:i] + pts[i+1:]
        else:
            entry_pt = pts[0]
            exit_pts = pts[1:]
        _doesnt_work_ = '''    
        _paths_, _merge_info_ = rt.circularPathRouter(entry_pt, exit_pts, circles_t)
        for _path_ in _paths_:
            d = f'M {_path_[0][0]} {_path_[0][1]}'
            for i in range(1,len(_path_)):
                d += f' L {_path_[i][0]} {_path_[i][1]}'
            svg_canvas.append(f'<path d="{d}" stroke="{rt.co_mgr.getColor(_entity_)}" stroke-width="1.2" fill="none"/>')
        '''
svg_canvas.append('</svg>')
# rt.svgObject(''.join(svg_canvas))

In [None]:
svg_canvas = [f'<svg width="{1024}" height="{1024}" x="0" y="0" viewBox="0 0 {canvas_w} {canvas_h}" xmlns="http://www.w3.org/2000/svg">']
for _circle_ in circles_t:
    sx,sy, r, wid = _circle_
    svg_canvas.append(f'<circle cx="{sx}" cy="{sy}" r="{r}" stroke="black" stroke-width="1.2" fill="none"/>')

for i in range(len(placement)):
    wx, wy, r, wid = placement[i]
    sx,sy     = xT(wx), yT(wy)
    circles_t.append((sx, sy, r, wid))
    cd        = widget_id_lu[wid]
    cd_w,cd_h = cd.w, cd.h
    cd_svg    = cd._repr_svg_()
    svg_canvas.append(f'<svg x="{sx-cd_w/2}" y="{sy-cd_h/2}" width="{cd_w}" height="{cd_h}">')
    wid_placement[wid] = (sx-cd_w/2,sy-cd_h/2)
    svg_canvas.append(cd_svg)
    svg_canvas.append('</svg>')

for _entity_ in list(all_entities_of_interest): # truncate for testing
    pts, pts_actual = [], []
    _entity_co_ = rt.co_mgr.getColor(_entity_)
    for wid in entities_of_interest:
        if _entity_ in entities_of_interest[wid]:
            sx,sy = wid_placement[wid]
            eps   = widget_id_lu[wid].entityPositions(_entity_)
            ap    = eps[0].attachmentPointVecs()[0]
            pts_actual.append((sx+ap[0],sy+ap[1],wid_to_circle_i[wid]))
            pts.append((sx+ap[0]+ap[2]*20,
                        sy+ap[1]+ap[3]*20,
                        wid_to_circle_i[wid]))
    if len(pts) > 1:
        if len(pts) > 2:
            i = findMiddlePointIndex(pts)
            entry_pt        = pts[i]
            entry_pt_actual = pts_actual[i]
            exit_pts        = pts[:i] + pts[i+1:]
            exit_pts_actual = pts_actual[:i] + pts_actual[i+1:]
        else:
            entry_pt        = pts[0]
            entry_pt_actual = pts_actual[0]
            exit_pts        = pts[1:]
            exit_pts_actual = pts_actual[1:]

        #svg_canvas.append(f'<circle cx="{entry_pt[0]}" cy="{entry_pt[1]}" r="{10}" stroke="red" stroke-width="1.2" fill="none"/>')
        svg_canvas.append(f'<line x1="{entry_pt_actual[0]}" y1="{entry_pt_actual[1]}" x2="{entry_pt[0]}" y2="{entry_pt[1]}" stroke="{_entity_co_}" stroke-width="1.2"/>')

        for i in range(len(exit_pts)):
            pt = exit_pts[i]
            pt_actual = exit_pts_actual[i]
            #svg_canvas.append(f'<circle cx="{pt[0]}" cy="{pt[1]}" r="{5}" stroke="black" stroke-width="1.2" fill="none"/>')
            svg_canvas.append(f'<line x1="{pt_actual[0]}" y1="{pt_actual[1]}" x2="{pt[0]}" y2="{pt[1]}" stroke="{_entity_co_}" stroke-width="1.2"/>')
        
        _paths_, _merge_info_ = rt.circularPathRouter(entry_pt, exit_pts, circles_t)
        for _path_ in _paths_:
            d = f'M {_path_[0][0]} {_path_[0][1]}'
            for i in range(1,len(_path_)):
                d += f' L {_path_[i][0]} {_path_[i][1]}'
            svg_canvas.append(f'<path d="{d}" stroke="{_entity_co_}" stroke-width="2.6" fill="none"/>')

svg_canvas.append('</svg>')
rt.svgObject(''.join(svg_canvas))

In [None]:
len(all_entities_of_interest)