### Mapping Network Topology with Netflow Data in CAS

In [1]:
import os
import pandas as pd
import swat
from swat.cas import datamsghandlers as dmh

In [2]:
os.environ["CAS_CLIENT_SSL_CA_LIST"]="/home/ds/cascert.pem"

conn = swat.CAS("d-crd-datasci01big.dev.cyber.sas.com", 5570)

In [3]:
conn.dropcaslib(caslib='LANL_NF', quiet=True)

conn.addcaslib(name='LANL_NF', path='/home/datasets/LANL/NF/', 
               description="LANL Netflow",
               session=False)
conn.setsessopt(caslib='LANL_NF')

NOTE: Cloud Analytic Services removed the caslib 'LANL_NF'.
NOTE: 'LANL_NF' is now the active caslib.
NOTE: Cloud Analytic Services added the caslib 'LANL_NF'.
NOTE: 'LANL_NF' is now the active caslib.


In [None]:
dfNetflow = pd.read_csv("/home/ds/datasets/NF/netflow_day-02_hr13.csv")
dmhNetflow = dmh.PandasDataFrame(dfNetflow)
out1 = conn.addtable(table='nf_day_02_hr13', caslib='LANL_NF', **dmhNetflow.args.addtable)

In [None]:
pairsCode = F'''CREATE TABLE nf_day_02_hr13_pairs AS SELECT SrcDevice, DstDevice, COUNT(*) FROM nf_day_02_hr13 GROUP BY SrcDevice, DstDevice '''
print(pairsCode)

In [None]:
if conn.table.tableexists(name='nf_day_02_hr13_pairs'):
    conn.table.droptable(name='nf_day_02_hr13_pairs')

In [None]:
conn.loadactionset("fedsql")

In [None]:
conn.fedsql.execdirect(pairsCode)

In [None]:
conn.save(table='nf_day_02_hr13', name='nf_day_02_hr13.sashdat', caslib='LANL_NF')
conn.save(table='nf_day_02_hr13_pairs', name='nf_day_02_hr13_pairs.sashdat', caslib='LANL_NF')

In [4]:
conn.loadactionset('hypergroup')

conn.loadtable('nf_day_02_hr13_pairs.sashdat', caslib='LANL_NF')

NOTE: Added action set 'hypergroup'.
NOTE: Cloud Analytic Services made the file nf_day_02_hr13_pairs.sashdat available as table NF_DAY_02_HR13_PAIRS in caslib LANL_NF.


In [None]:
conn.hyperGroup.hyperGroup(
    createOut = "NEVER", # this suppresses the creation of a table that’s usually produced, but it’s not needed here
    allGraphs = True, # process all graphs even if disconnected
    inputs    = ["SrcDevice", "DstDevice"], # the source and target column indicating an edge
    table     = 'nf_day_02_hr13_pairs', # the input data set
    edges     = {'name':'edges','replace':True}, # result table containing edge attributes
    vertices  = {'name':'edges','replace':True}  # result table containing vertice attributes
)