# EVE Basic Graph Mining

## Imports

In [None]:
import pandas as pd
import numpy as np
import networkx as nx

In [None]:
import hvplot.networkx as hvnx
import holoviews as hv
from holoviews import opts

In [None]:
from ipywidgets import interact, interactive, fixed, interact_manual
from ipywidgets import Output, Dropdown, SelectMultiple, HBox, VBox, Button, IntSlider, FloatRangeSlider, FloatSlider, Text, Combobox
from IPython.display import display

In [None]:
from surianalytics.connectors import RESTSciriusConnector
from surianalytics.datamining import min_max_scaling

## Data connection

In [None]:
c = RESTSciriusConnector()

## Global variables

In [None]:
if "FIELDS" not in globals():
    FIELDS = []
if "GRAPH" not in globals():
    GRAPH = None
    
GRAPH_WIDTH = 1000
GRAPH_HEIGHT = 1000

## Define widgets

In [None]:
DROPDOWN_EVENT_TYPES = Dropdown(
    options=["all"],
    value="all",
    description="Event type"
)
DROPDOWN_FIELDS_SRC = Combobox(
    options=FIELDS,
    value=None
)
DROPDOWN_FIELDS_DEST = Combobox(
    options=FIELDS,
    value=None
)
SLIDER_SIZE_COL = IntSlider(
    value=500,
    min=100,
    max=5000,
    description='Aggregation max size',
    orientation='horizontal',
    readout=True,
)
SLIDER_THRESH_DEGREE = FloatRangeSlider(
    value=[0, 1],
    min=0,
    max=1,
    step=0.001,
    description="Degree: ",
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.2f',
)

In [None]:
TEXT_QUERY = Text(
    description='Query filter:',
    value="*",
    continuous_update=True
)

## Widget handlers

### Timestamp handlers

In [None]:
from datetime import datetime, timedelta

In [None]:
TIME_END = datetime.utcnow()
TIME_BEGINNING = TIME_END - timedelta(minutes=60)

SLIDER_TIME_MINUTES = IntSlider(
    value=60,
    min=5,
    max=600,
    step=5,
    description='Minutes',
    orientation='horizontal',
    readout=True,
)

In [None]:
# TimePicker is not in ipywidgests 7.7
# Use text boxes instead for now
TEXT_TIME_BEGINNING = Text(
    description="From: ",
    value=TIME_BEGINNING.isoformat(),
    continuous_update=True
)
TEXT_TIME_END = Text(
    description="To: ",
    value=TIME_END.isoformat(),
    continuous_update=True
)
_ = c.set_query_timeframe(from_date=TEXT_TIME_BEGINNING.value, to_date=TEXT_TIME_END.value)

In [None]:
def handler_update_timeframe(args):
    c.set_query_timeframe(from_date=TEXT_TIME_BEGINNING.value, to_date=TEXT_TIME_END.value)

In [None]:
BUTTON_UPDATE_TIME = Button(description="Set time")
BUTTON_UPDATE_TIME.on_click(handler_update_timeframe)

In [None]:
def handler_reset_timeframe(args):
    time_to = datetime.utcnow()
    time_from = time_to - timedelta(minutes=SLIDER_TIME_MINUTES.value)
    TEXT_TIME_END.value = time_to.isoformat()
    TEXT_TIME_BEGINNING.value = time_from.isoformat()
    handler_update_timeframe()

In [None]:
BUTTON_RESET_TIME = Button(description="Generate time")
BUTTON_RESET_TIME.on_click(handler_reset_timeframe)

In [None]:
BOX_TIME = HBox([SLIDER_TIME_MINUTES, BUTTON_RESET_TIME, TEXT_TIME_BEGINNING, TEXT_TIME_END, BUTTON_UPDATE_TIME])

### Build unique event types

In [None]:
def handler_update_event_types(args):
    DROPDOWN_EVENT_TYPES.options = c.get_event_types()

In [None]:
BUTTON_REFRESH_EVENT_TYPES = Button(description="Refresh event types")
BUTTON_REFRESH_EVENT_TYPES.on_click(handler_update_event_types)

### Pull unique fields

In [None]:
FIELDS_FILTER = ["@timestamp", "timestamp"]
def handler_build_fields(event_type="all"):
    fields = c.get_unique_fields(event_type=DROPDOWN_EVENT_TYPES.value)
    if len(fields) == 0:
        print("no fields from", event_type)
        
    global FIELDS
    FIELDS = [f for f in fields if f not in FIELDS_FILTER]
    DROPDOWN_FIELDS_SRC.options = FIELDS
    DROPDOWN_FIELDS_DEST.options = FIELDS
    
_ = interactive(handler_build_fields, event_type=DROPDOWN_EVENT_TYPES)

### Pull graph data

In [None]:
OUTPUT_GRAPH_DL = Output()
def handler_pull_graph_data(args):
    OUTPUT_GRAPH_DL.clear_output()
    with OUTPUT_GRAPH_DL:
        display(print("Calling scirius"))
        global GRAPH
        GRAPH = c.get_eve_fields_graph_nx(        
            col_src=DROPDOWN_FIELDS_SRC.value,
            col_dest=DROPDOWN_FIELDS_DEST.value,
            size_src=SLIDER_SIZE_COL.value,
            size_dest=SLIDER_SIZE_COL.value,
            event_type=DROPDOWN_EVENT_TYPES.value,
            qfilter=TEXT_QUERY.value
        )
        display("call done, got {} nodes and {} edges".format(len(GRAPH.nodes()), len(GRAPH.edges())))

In [None]:
BUTTON_LOAD_DATA = Button(description="Pull data")
BUTTON_LOAD_DATA.on_click(handler_pull_graph_data)

### Draw network graph

In [None]:
OUTPUT_GRAPH = Output()
def handler_draw_graph(args):
    OUTPUT_GRAPH.clear_output()
    if len(GRAPH.edges) == 0:
        display("Missing graph data, please run query first")
        return
    
    # use local graph object
    g = GRAPH.copy()
    
    # drop empty nodes (and connected edges) 
    # means missing eve field, no connection can be made
    if "" in list(g.nodes()):
        g.remove_node("")
    
    doc_counts = [attr["doc_count"] for (src, dst, attr) in g.edges(data=True)]
    doc_counts = np.log2(doc_counts)
    doc_counts = min_max_scaling(pd.Series(doc_counts))

    # add scaled doc counts to edges to serve as weights
    for i, (src, dst, attr) in enumerate(g.edges(data=True)):
        attr["scaled_doc_count"] = doc_counts[i]
        
    # discover node degree and scale the values
    degree = [g.degree(n) for n in g.nodes()]
    degree = min_max_scaling(pd.Series(degree))
    
    to_remove = []
    # drop nodes that do not match filtering criteria
    for i, n in enumerate(g.nodes()):
        if degree[i] > SLIDER_THRESH_DEGREE.value[1] or degree[i] < SLIDER_THRESH_DEGREE.value[0]:
            to_remove.append(n)
    
    for n in to_remove:
        g.remove_node(n)
        
    # calculate total number of docs for edges
    doc_count_total_edge = np.sum([attr["doc_count"] for (src, dst, attr) in g.edges(data=True)])

    # locate source nodes
    n_src = [i for i, (u, a) in enumerate(g.nodes(data=True)) if a["kind"] == "source"]
    # locate destination nodes
    n_dst = [i for i, (u, a) in enumerate(g.nodes(data=True)) if a["kind"] == "destination"]

    # generate layout
    pos = nx.layout.spring_layout(g)

    # generate nodes per kind
    nodes_src = hvnx.draw_networkx_nodes(g, pos, nodelist=n_src, node_color='#A0CBE2').opts(width=GRAPH_WIDTH, height=GRAPH_HEIGHT)
    nodes_dst = hvnx.draw_networkx_nodes(g, pos, nodelist=n_dst, node_color="Orange").opts(width=GRAPH_WIDTH, height=GRAPH_HEIGHT)

    # use kwargs to make parameter handling easier
    edge_params = {
        "alpha": 1,
        "edge_color": 'scaled_doc_count', 
        "edge_cmap": 'viridis',
        "edge_width": hv.dim('scaled_doc_count')*5
    }
    
    # generate edges
    edges = hvnx.draw_networkx_edges(g, pos, **edge_params).opts(width=GRAPH_WIDTH, height=GRAPH_HEIGHT)

    # overlay nodes and edges
    res = edges * nodes_src * nodes_dst
    
    component_sizes = [len(c) for c in sorted(nx.connected_components(g), key=len, reverse=True) if len(c) > 1]
    
    with OUTPUT_GRAPH:
        display(print("Number of documents for edges: {}".format(doc_count_total_edge)))
        display(print("Number of clusters: {}".format(len(component_sizes))))
        display(res)

In [None]:
BUTTON_DRAW_GRAPH = Button(description="Draw graph")
BUTTON_DRAW_GRAPH.on_click(handler_draw_graph)

## Interactions

### Boxes

In [None]:
BOX_COL_SRC = HBox([DROPDOWN_FIELDS_SRC, SLIDER_THRESH_DEGREE])
BOX_COL_DEST = HBox([DROPDOWN_FIELDS_DEST])

BOX_COL_CONFIG = VBox([BOX_COL_SRC, BOX_COL_DEST])
BOX_BUTTONS = HBox([BUTTON_REFRESH_EVENT_TYPES, BUTTON_LOAD_DATA, BUTTON_DRAW_GRAPH])

BOX_TIME_INPUT = VBox([TEXT_TIME_BEGINNING, TEXT_TIME_END])
BOX_TIME_BTNS = VBox([BUTTON_UPDATE_TIME, BUTTON_RESET_TIME])

BOX_TIME = HBox([BOX_TIME_INPUT, BOX_TIME_BTNS, SLIDER_TIME_MINUTES])

BOX_FINAL = VBox([BOX_TIME, TEXT_QUERY, DROPDOWN_EVENT_TYPES, BOX_COL_CONFIG, BOX_BUTTONS])

### Display area

In [None]:
display(BOX_FINAL)
display(OUTPUT_GRAPH_DL)
display(OUTPUT_GRAPH)