# SteelScript NetProfiler and NetShark Analysis Integration

## Imports and Setup

In [None]:
import sys
import csv
import datetime

import pandas

In [None]:
import steelscript
from steelscript.common.service import UserAuth
from steelscript.common.datautils import Formatter
from steelscript.common import timeutils

In [None]:
from steelscript.netprofiler.core.netprofiler import NetProfiler
from steelscript.netprofiler.core.filters import TimeFilter, TrafficFilter
from steelscript.netprofiler.core.report import TrafficOverallTimeSeriesReport, TrafficSummaryReport

In [None]:
from steelscript.netshark.core.netshark import NetShark
from steelscript.netshark.core.types import Key, Value
from steelscript.netshark.core.filters import NetSharkFilter
from steelscript.netshark.core.filters import TimeFilter as NSTimeFilter

In [None]:
netshark_host = "NETSHARK.HOSTNAME.COM"
netprofiler_host = "NETPROFILER.HOSTNAME.COM"

username = "USERNAME"
password = "PASSWORD"

auth = UserAuth(username, password)

## Initialize NetProfiler and NetShark Objects

In [None]:
p = NetProfiler(netprofiler_host, auth=auth)

In [None]:
s = NetShark(netshark_host, auth=auth)

## Define Report Criteria

### Time filters, Columns, and Groupbys

In [None]:
timefilter = TimeFilter.parse_range('last 1 hour')

print 'Start: %s' % timefilter.start
print 'End: %s' % timefilter.end

In [None]:
print timefilter

In [None]:
columns = [
    p.columns.key.group_name,
    p.columns.key.group_id,
    p.columns.value.in_avg_bytes,
    p.columns.value.in_avg_pkts,
    p.columns.value.out_avg_bytes,
    p.columns.value.out_avg_pkts,
    p.columns.value.response_time
]
groupby = p.groupbys.host_group


## Create NetProfiler Report and Retrieve Data

In [None]:
report = TrafficSummaryReport(p)
report.run(columns=columns,
           groupby=groupby,
           centricity='int',
           resolution='1m',
           timefilter=timefilter,
           trafficexpr=None)
data = report.get_data()
report.delete()

In [None]:
data[:2]

## Format Data

### Simple table formatting

In [None]:
headers = [c.key for c in columns]
print headers

In [None]:
Formatter.print_table(data, headers=headers)

### Formatting using pandas data analysis library

In [None]:
df = pandas.DataFrame(data, columns=headers)
df

### Find row with the highest response time

In [None]:
rowidx = df['response_time'].idxmax()
rowidx

In [None]:
df.ix[rowidx]

In [None]:
df.ix[rowidx,'group_name']

### Find application using the most resources at that hostgroup

In [None]:
columns = [
    p.columns.key.app_name,
    p.columns.value.network_rtt,
    p.columns.value.in_avg_pkts,
    p.columns.value.out_avg_bytes,
    p.columns.value.out_avg_pkts,
]
groupby = p.groupbys.application

filterexpr = TrafficFilter('hostgroup ByLocation:%s' % df.ix[rowidx,'group_name'])

In [None]:
report = TrafficSummaryReport(p)
report.run(columns=columns,
           sort_col=p.columns.value.network_rtt,
           groupby=groupby,
           centricity='int',
           resolution='1m',
           timefilter=timefilter,
           trafficexpr=filterexpr)
app_data = report.get_data()
report.delete()

In [None]:
app_df = pandas.DataFrame(app_data, columns=[c.key for c in columns]).replace('', 0)
app_df.sort(('network_rtt'), inplace=True, ascending=False)
app_df.head()

## Query NetShark for Microbursts of Hostgroup IP Addresses

### Extract list of IPs from hostgroup definition

In [None]:
from steelscript.netprofiler.core.hostgroup import HostGroupType, HostGroup

In [None]:
hgtype = HostGroupType.find_by_name(p, 'ByLocation')

In [None]:
print hgtype.name
hgtype.groups

In [None]:
df.ix[rowidx]

In [None]:
location = df.ix[rowidx]['group_name']
hostgroup = hgtype.groups[location]
print 'Hostgroup name: %s\nHostgroup CIDRs: %s' % (hostgroup.name, hostgroup.get())

### Apply Hostgroup CIDRs to NetShark filter

In [None]:
s.get_capture_jobs()

In [None]:
job = s.get_capture_jobs()[0]

<font color='blue'>We use a different CIDR block here because our demo NetProfiler and NetShark are on different networks, in the actual script, this value gets carried over from the previous hostgroup definition.</font>

In [None]:
ns_columns = [
    Key(s.columns.ip.src),
    Key(s.columns.tcp.src_port),
    Key(s.columns.ip.dst),
    Key(s.columns.tcp.dst_port),
    Value(s.columns.generic.max_microburst_1ms.bits),
]

In [None]:
cidrs = '172.0.0.0/8'
nsfilter = NetSharkFilter('ip.address="%s"' % cidrs)

ns_filters = [
    NSTimeFilter(timefilter.start, timefilter.end),
    nsfilter
]

### Retrive All Microbursts over same time period

In [None]:
with s.create_view(job, ns_columns, ns_filters, sync=True) as view:
    d = view.get_data(aggregated=True)

In [None]:
d

### Find hostpair with biggest burst

In [None]:
vals = d[0]['vals']

In [None]:
hostpair = max(vals, key=lambda x:x[4])
hostpair

### Create new NetShark Timeseries view for biggest burst hostpair

In [None]:
nsfilter = NetSharkFilter(
    'ip.src="{0}" & tcp.src_port="{1}" & ip.dst="{2}" & tcp.dst_port="{3}"'.format(*hostpair)
)

ns_filters = [
    NSTimeFilter(timefilter.start, timefilter.end),
    nsfilter
]

In [None]:
with s.create_view(job, ns_columns, ns_filters, sync=True) as view:
    dtime = view.get_data(aggregated=False,
                          delta=datetime.timedelta(seconds=1))

In [None]:
dtime

### Transform into simple table and plot results

In [None]:
timeseries = []
headers = ['time', 'packets', '1ms_uburst']
for item in dtime:
    row = (item['t'], item['p'], item['vals'][0][-1])
    timeseries.append(row)
  
tdf = pandas.DataFrame(timeseries, columns=headers).set_index('time')
tdf[:10]

In [None]:
%pylab inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt

In [None]:
tdf.plot()

In [None]:
tdf.plot(y=['packets'], figsize=(12,3))
tdf.plot(y=['1ms_uburst'], figsize=(12,3))

In [None]:
tdf.packets.plot()
tdf['1ms_uburst'].plot(secondary_y=True, figsize=(12,6))

In [None]:
tdf.plot(subplots=True, figsize=(12,8))