In [None]:
from matplotlib import animation
from IPython.display import HTML
from svglib.svglib import svg2rlg
from reportlab.graphics import renderPM
import PIL
import io
from matplotlib import pyplot as plt
import pandas as pd
import polars as pl
import numpy as np
import rtsvg
import time

%matplotlib inline

rt     = rtsvg.RACETrack()
params = {'x_field':'x', 'y_field':'y', 'w':800, 'h':800}
xy    = {}
xy[0] = rt.xy(pd.DataFrame({'x': [1,2,3,4,5,6], 'y': [10,20,25,11,12,13]}), **params)
xy[1] = rt.xy(pd.DataFrame({'x': [1,2,3,4,5,6], 'y': [12,18,20,12,11,12]}), **params)
xy[2] = rt.xy(pd.DataFrame({'x': [1,2,3,4,5,6], 'y': [11,17,21,11,10,11]}), **params)
xy[3] = rt.xy(pd.DataFrame({'x': [1,2,3,4,5,6], 'y': [13,16,22,12, 8,12]}), **params)

ts1 = time.time()
df = pl.concat([pl.read_csv('../../data/2013_vast_challenge/mc3_netflow/nf/nf-chunk1.csv'),
                pl.read_csv('../../data/2013_vast_challenge/mc3_netflow/nf/nf-chunk2.csv'),
                pl.read_csv('../../data/2013_vast_challenge/mc3_netflow/nf/nf-chunk3.csv')])
df = rt.columnsAreTimestamps(df, 'parsedDate')
ts2 = time.time()
print(f'Loading Time ... {ts2 - ts1:0.2} sec')

df = df.drop(['TimeSeconds',
              #'parsedDate',
              'dateTimeStr',
              #'ipLayerProtocol',
              'ipLayerProtocolCode',
              #'firstSeenSrcIp',
              #'firstSeenDestIp',
              #'firstSeenSrcPort',
              #'firstSeenDestPort',
              'moreFragments',
              'contFragments',
              #'durationSeconds',
              'firstSeenSrcPayloadBytes',
              'firstSeenDestPayloadBytes',
              #'firstSeenSrcTotalBytes',
              #'firstSeenDestTotalBytes',
              #'firstSeenSrcPacketCount',
              #'firstSeenDestPacketCount',
              'recordForceOut'])

df = df.rename({'parsedDate':'ts',
                'ipLayerProtocol':'pro',
                'firstSeenSrcIp':'sip',
                'firstSeenDestIp':'dip',
                'firstSeenSrcPort':'spt',
                'firstSeenDestPort':'dpt',
                'durationSeconds':'dur',
                'firstSeenSrcTotalBytes':'soct',
                'firstSeenDestTotalBytes':'doct',
                'firstSeenSrcPacketCount':'spkt',
                'firstSeenDestPacketCount':'dpkt'})

print('total nodes = ', len(set(df['sip']) | set(df['dip'])))
df = df.sample(100_000)
df.sample(3)

In [None]:
# Separate dataframe by timestamp
df = df.sort('ts')
str_keys, str_to_df_list, str_to_xy = [], {}, {}
for k, k_df in df.group_by_dynamic('ts', every='1h'):
    k_str = str(k[0])
    str_keys.append(k_str)
    str_to_df_list[k_str] = k_df
    str_to_xy     [k_str] = (0,0)
# Create SVGs of each time bin
w, h = 512, 512
str_to_svg = rt.createSmallMultiples(df, str_to_df_list, str_to_xy,
                                     count_by=None, count_by_set=False, color_by='dpt', ts_field='ts',
                                     sm_type='xy', sm_params={'x_field':'sip', 'y_field':'dip'}, 
                                     x_axis_independent=False, y_axis_independent=False, parent_id='animation',
                                     sm_w=w, sm_h=h)
# Array of svgs (in order)
svgs = []
for k in str_keys: svgs.append(str_to_svg[(k,)])
# Recipe for the animation
def svgToImage(_svg_):
    if type(_svg_) is not str: _svg_ = _svg_._repr_svg_()
    b = io.BytesIO()
    renderPM.drawToFile(svg2rlg(io.StringIO(_svg_)), b, 'PNG')
    im = PIL.Image.open(b)
    return im
dpi = plt.rcParams['figure.dpi']
fig_width_inches  = w / dpi
fig_height_inches = h / dpi
fig, ax = plt.subplots(figsize=(fig_width_inches, fig_height_inches))
ims = []
for i in range(len(svgs)):
    im = ax.imshow(svgToImage(svgs[i]), animated=True)
    #if i == 0: ax.imshow(svgToImage(xy[i]))
    ims.append([im])
ani = animation.ArtistAnimation(fig, ims, interval=50, blit=True, repeat_delay=2000)
plt.axis('off')
#HTML(ani.to_html5_video())

In [None]:
#
# For 100K data points from the VAST 2013 dataset, the following code took 9m51s (a total of 591 seconds)
#
# Concatentate Polars DataFrames =    1.67 seconds
# SVGs creation                  =   26.94 seconds
# SVG To Image Rendering         =  445.42 seconds
# Animation Creation             =   24.76 seconds
# HTML Video Encoding            =   92.21 seconds (estimated by subtracting the above from the total)
#
def sliceDataFrameByTimestamp(df, ts_field, every, slice_aggregation=1):
    df_array, str_keys, str_to_df, str_to_xy = [], [], {}, {}
    concat_ts_sum = 0.0
    df = df.sort(ts_field)
    for k, k_df in df.group_by_dynamic(ts_field, every=every):
        k_str = str(k[0])
        str_keys.append(k_str)
        df_array.append(k_df)
        if len(df_array) > slice_aggregation: df_array = df_array[1:]
        ts0 = time.time()
        str_to_df[k_str] = pl.concat(df_array)
        concat_ts_sum += time.time() - ts0
        str_to_xy[k_str] = (0,0)
    print(f'concat_ts_sum = {concat_ts_sum:.2f} seconds')
    return str_keys, str_to_df, str_to_xy
str_keys, str_to_df, str_to_xy = sliceDataFrameByTimestamp(df, 'ts', '1m', 60)
# Create SVGs of each time bin
w, h = 512, 512
ts0 = time.time()
str_to_svg = rt.createSmallMultiples(df, str_to_df, str_to_xy,
                                     count_by=None, count_by_set=False, color_by='dpt', ts_field='ts',
                                     sm_type='xy', sm_params={'y_field':['sip','dip'], 'x_field':'ts'}, 
                                     x_axis_independent=True, y_axis_independent=False, parent_id='animation',
                                     sm_w=w, sm_h=h)
print(f'SVGs created in {time.time() - ts0:.2f} seconds')
# SVGs (in order)
svgs = []
for k in str_keys: svgs.append(str_to_svg[(k,)])
# Animation recipe
dpi = plt.rcParams['figure.dpi']
fig_width_inches  = w / dpi
fig_height_inches = h / dpi
fig, ax = plt.subplots(figsize=(fig_width_inches, fig_height_inches))
ts0 = time.time()
ims = []
for i in range(len(svgs)):
    im = ax.imshow(svgToImage(svgs[i]), animated=True)
    ims.append([im])
print(f'SVG To Image Creation took {time.time() - ts0:.2f} seconds')
ts0 = time.time()
ani = animation.ArtistAnimation(fig, ims, interval=50, blit=True, repeat_delay=2000)
print(f'Animation Creation took {time.time() - ts0:.2f} seconds')
plt.axis('off')
HTML(ani.to_html5_video())