## Import needed modules

In [None]:
# Standard modules to use and manipulate dataframes
import numpy as np
import pandas as pd
# Graphistry API import 
import graphistry, pathlib, os, nest_asyncio
from datetime import datetime

In [None]:
#General
nest_asyncio.apply()
location = %pwd
#file location
csvlocation = location + "../Data/"


### Graphistry credentials

In [None]:
%reload_ext dotenv
# Use find_dotenv to locate the file
%dotenv

In [None]:
#For graphistry
if os.getenv('API1_KEY'):
    #used locally
    api_username = os.getenv('API3_USERNAME')
    api_pwd = os.getenv('API3_PWD') 
    api1_key = os.getenv('API1_KEY')
else:
    #used for TreeBeard GitHub Action
    api_username = os.getenv('TB_API3_USERNAME')
    api_pwd = os.getenv('TB_API3_PWD') 
    api1_key = os.getenv('TB_API1_KEY')

In [None]:
def write_csv_data(df_w, filename):
    df_w.to_csv(csvlocation + filename)
    print(filename," saved!")
    return

In [None]:
def read_csv_data(filename):
    df_r = pd.read_csv(csvlocation + filename)
    return(df_r)

Groupnby anon NOT good!
After merging different dfs anon can be the same but not same data, need to use the hash created in '2_Merge_Osquery_Data'

In [None]:
#group by hash as anon was not unique after merging sources
def intersect(df):
    #group by hash and if there is only 1 unique table name in the grouping drop it, this would mean the data is only available in that table
    extract_df_clean_filter_dup = df.groupby(by='hash')
    extract_df_clean_filter_dup = extract_df_clean_filter_dup.filter(lambda x: x['Table'].nunique() > 1).reset_index(drop=True)
    # create groups of unique combinations of 'Table.Column' based on same hash
    groups_hash = extract_df_clean_filter_dup.groupby('hash')['Table.Column'].unique().apply(', '.join).to_dict()
    extract_df_clean_filter_dup['intersect'] = extract_df_clean_filter_dup['hash'].map(groups_hash)
    return(extract_df_clean_filter_dup)

## GRAPH Functions
Colors Osquery to use in graph
- rgb(165, 150, 255); #A596FF
- rgb(0, 18, 95); #00125F maybe a ring omheen?
- #00094c; miss te zwart
- #4a9dff; licht blauw

In [None]:
def node_decorator(version,nodes):
    #subsitute nodeTitle from Table.Column to Column
    nodes['node_title'] = [n_title.split(".")[1] if n_type == 'Table.Column' else n_title for (n_type,n_title) in zip(nodes['type'],nodes['nodeTitle'])]
    #add fontawesome
    #nodes['node_icon'] = ["table" if node_type == 'Table' else "columns" if node_type == 'Table.Column' else "asterisk" for node_type in nodes['type']]
    #setting radius to create circle like features
    nodes['radius'] = [250 if node_type == 'Table' else 600 if node_type == 'Table.Column' else 1000 for node_type in nodes['type']]

    if version is 1:
        #add color
        nodes['node_color'] = [0 if node_type == 'Table' else 4 if node_type == 'Table.Column' else 5 for node_type in nodes['type']]
    elif version is 3:
        #add color
        nodes['node_color'] = [0xA596FF00 if node_type == 'Table' else 0x00125F00 if node_type == 'Table.Column' else 0x4a9dff00 for node_type in nodes['type']]
        #change these columns to str otherwise error with api3
        nodes['node_color'] = nodes['node_color'].astype(np.int64)
        #nodes['nodeTitle'] = nodes['nodeTitle'].astype(str)
        #nodes['node_title'] = nodes['node_title'].astype(str)
    return(nodes)

def edge_decorator(version,edges):
    #edges = edges.sort_values(by=['src', 'dst'])
    #edges['hash'] = edges['hash'].astype('str')
    return(edges)

In [None]:
def node_decorator_Y(version,nodes):
    #subsitute nodeTitle from Table.Column to Column
    nodes['node_title'] = [n_title.split(".")[1] if n_type == 'Table.Column' else n_title for (n_type,n_title) in zip(nodes['type'],nodes['nodeTitle'])]
    #add fontawesome
    #nodes['node_icon'] = ["table" if node_type == 'Table' else "columns" if node_type == 'Table.Column' else "asterisk" for node_type in nodes['type']]
    #setting radius to create circle like features
    nodes['radius'] = [250 if node_type == 'Table' else 600 if node_type == 'Table.Column' else 1000 for node_type in nodes['type']]

    if version is 1:
        #add color
        nodes['node_color'] = [0 if node_type == 'Table' else 4 if node_type == 'Table.Column' else 5 for node_type in nodes['type']]
    elif version is 3:
        #add color
        nodes['node_color'] = [0xA596FF00 if node_type == 'Table' else 0x00125F00 if node_type == 'Table.Column' else 0x4a9dff00 for node_type in nodes['type']]
        #change these columns to str otherwise error with api3
        nodes['node_color'] = nodes['node_color'].astype(np.int64)
        #nodes['nodeTitle'] = nodes['nodeTitle'].astype(str)
        #nodes['node_title'] = nodes['node_title'].astype(str)
    return(nodes)

def edge_decorator_Y(version,edges):
    #edges = edges.sort_values(by=['src', 'dst'])
    #edges['hash'] = edges['hash'].astype('str')
    return(edges)

### Get JWT Graphistry Token

In [None]:
def generate_graphistry_token(api_username,api_pwd): 
    global jwt_token_time
    global jwt_token
    if 'jwt_token_time' in globals(): 
        now_time = datetime.now() 
        diff_time = now_time - jwt_token_time
        if diff_time.seconds >= 3600:
            jwt_token_time = datetime.now() 
            jwt_token = graphistry.ArrowUploader(server_base_path='https://hub.graphistry.com').login(username=api_username, password=api_pwd).token
        else:
            pass
    else:
        jwt_token_time = datetime.now()
        jwt_token = graphistry.ArrowUploader(server_base_path='https://hub.graphistry.com').login(username=api_username, password=api_pwd).token
    return(jwt_token)

In [None]:
def graphistry_graph(version,df): 
    if version is 1:
        graphistry.register(api=version, key='test') 
    elif version is 3:
        api_token = generate_graphistry_token(api_username,api_pwd)
        graphistry.register(api=version, protocol="https", server='hub.graphistry.com', token=api_token)

    g = graphistry.hypergraph(df, ['Table', 'Table.Column', 'intersect'], direct=True, drop_edge_attrs=True,
        opts={
            "EDGES": {
                "Table": [ "Table.Column" ],
                "Table.Column": [ "intersect" ]
            }
        })['graph']

    #add color and other stuff
    g = g.nodes(node_decorator(version,g._nodes)).edges(edge_decorator(version,g._edges))
    #add url params settings
    g = g.settings(url_params={'play':'10000','showArrows':'false','lockedR':'true','bg':'%23FFFFFF','linLog':'true','edgeCurvature':0.05,'edgeOpacity':0.2})
    #bind options
    g = g.bind(point_title ='node_title', point_color='node_color', point_label='intersect', point_x='radius',point_y=0)
    
    #without render
    return(g.plot(render=False))

In [None]:
def graphistry_graph_Y(version,df): 
    if version is 1:
        graphistry.register(api=version, key='test') 
    elif version is 3:
        api_token = generate_graphistry_token(api_username,api_pwd)
        graphistry.register(api=version, protocol="https", server='hub.graphistry.com', token=api_token)

    g = graphistry.hypergraph(df, ['Table', 'Table.Column', 'intersect'], direct=True, drop_edge_attrs=True,
        opts={
            "EDGES": {
                "Table": [ "Table.Column" ],
                "Table.Column": [ "intersect" ]
            }
        })['graph']

    #add color and other stuff
    g = g.nodes(node_decorator(version,g._nodes)).edges(edge_decorator(version,g._edges))
    #add url params settings
    g = g.settings(url_params={'play':'10000','showArrows':'false','lockedY':'true','bg':'%23FFFFFF','edgeCurvature':0,'edgeOpacity':0.2})#,'linLog':'true','edgeCurvature':0.05,'edgeOpacity':0.2})
    #bind options
    g = g.bind(point_title ='node_title', point_color='node_color', point_label='intersect', point_y='radius')#,point_y='radius')
    
    #without render
    return(g.plot(render=False))

#### Name of layout options to use in url_params  
gravity: 0.10471285480508996  
linLog: false  
lockedX: false  
lockedY: false  
lockedR: true  
scalingRatio: 0.10471285480508996  
edgeInfluence: 0  
dissuadeHubs: true  
strongGravity: false  
precisionVsSpeed: -0.1  

References:
- https://stackoverflow.com/questions/843277/how-do-i-check-if-a-variable-exists
- https://blog.softhints.com/python-3-subtrack-time/
- https://markhneedham.com/blog/2019/05/10/jupyter-runtimeerror-this-event-loop-is-already-running/