In [None]:
import pyarrow
import pandas
import logging
import ipaddress
import networkx as nx
import matplotlib.pyplot as plt

from pyvast import VAST

async def query(q):
    proc = await vast.export().arrow(q).exec()
    stdout, stderr = await proc.communicate()
    reader = pyarrow.ipc.open_stream(stdout)
    return reader.read_all()

# Accepts a 128-bit buffer holding an IPv6 address and
# returns an IPv4 or IPv6 address.
def unpack_ip(buffer):
    num = int.from_bytes(buffer, byteorder='big')
    # convert IPv4 mapped addresses back to regular IPv4
    # https://tools.ietf.org/html/rfc4291#section-2.5.5.2
    if (num >> 32) == 65535:
        num = num - (65535 << 32)
    return ipaddress.ip_address(num)
  
def conn_to_edgelist(df):
    source = 'id.orig_h'
    dest = 'id.resp_h'
    edgelist = df.groupby([source, dest]).size().to_frame(name='weight').reset_index()
    edgelist[source] = edgelist[source].apply(unpack_ip)
    edgelist[dest] = edgelist[dest].apply(unpack_ip)
    return edgelist

def draw(edgelist, with_labels=False):
    G = nx.from_pandas_edgelist(edgelist, source='id.orig_h', target='id.resp_h', edge_attr='weight')
    pos = nx.kamada_kawai_layout(G, weight=None)
    edges, weights = zip(*nx.get_edge_attributes(G,'weight').items())
    weights = [x / 100 for x in weights]
    nx.draw_networkx_nodes(G, pos=pos)
    nx.draw_networkx_edges(G, pos=pos, edgelist=edges, width=weights)
    if with_labels:
        nx.draw_networkx_labels(G, pos=pos)

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logging.debug("Logger active")

plt.rcParams['figure.figsize'] = [15, 15]

In [None]:
vast = VAST()
await vast.test_connection()

In [None]:
# Select all events of type zeek.conn
data = await query("#type ~ /zeek.conn/")

In [None]:
data.shape

In [None]:
data.schema

In [None]:
# We're using pandas for simplicity, but there are 
# also other options like cudf for higher performance.
# https://github.com/rapidsai/cudf
df = data.to_pandas()

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
edgelist = conn_to_edgelist(df)

In [None]:
draw(edgelist)

In [None]:
pruned = edgelist[edgelist['weight'] > 8]
draw(pruned, with_labels=True)

In [None]:
pruned = edgelist[edgelist['weight'] > 16]
draw(pruned, with_labels=True)

In [None]:
pruned = edgelist[edgelist['weight'] > 24]
draw(pruned, with_labels=True)