In [1]:
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan, bulk
from itertools import islice
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.axes as ax
import plotly as py
import plotly.graph_objects as go
import seaborn as sns
import plotly.express as px
%matplotlib inline

import helpers as hp

user = None
passwd = None
if user is None and passwd is None:
    with open("creds.key") as f:
        user = f.readline().strip()
        passwd = f.readline().strip()
credentials = (user, passwd)
es = Elasticsearch(['atlas-kibana.mwt2.org:9200'], timeout=240, http_auth=credentials)

print("Connection Successful") if es.ping() == True else print("Connection Unsuccessful")


Connection Successful


In [2]:
def getAvgPacketLossbyHost(es, fld, group, fromDate, toDate):
    query = {
      "size": 0, 
          "query": {
            "bool":{
              "must":[
                {
                  "range": {
                    "timestamp": {
                      "gte": fromDate,
                      "lte": toDate
                    }
                  }
                }
              ]
            }
          },
          "aggs":{
            "host":{
              "terms":{
                "field":fld,
                "size": 9999
              },
              "aggs":{
                "period":{
                  "date_histogram":{
                    "field":"timestamp",
                    "calendar_interval" : group
                  },
                  "aggs":{
                    "avg_loss":{
                      "avg":{
                        "field": "packet_loss"
                      }
                    }
                  }
                }
              }
            }
          }
        }
    


    
    data = es.search("ps_packetloss", body=query)
    
    result = []
    unknown = []

    for host in data['aggregations']['host']['buckets']:
        resolved = hp.ResolveHost(es, host['key'])
        if (resolved['resolved']):
            h = resolved['resolved']
        elif (len(resolved['unknown'][0]) != 0) and (resolved['unknown'][0] not in unknown):
            unknown.append(resolved['unknown']) 
                
        for period in host['period']['buckets']:
            result.append({'host':h, 'period':period['key'], 'avg_loss':period['avg_loss']['value']})
            
    return {'resolved': result, 'unknown': unknown} 

In [3]:
# from 31-12-2017 to 31-12-2019 Get a list sites and theis avg packet loss being a src_site and a dest_site
# ssite = getAvgPacketLossbySiteOverAYear(es, 'src_site', 'day', '1514757601000', '1577829599000')
# dsite = getAvgPacketLossbySiteOverAYear(es, 'dest_site', 'day', '1514757601000', '1577829599000')


# from 01-12-2019 to 22-01-2020 Get a list hosts and theis avg packet loss being a src_host and a dest_host
# ps_packetloss has data since mid December 2019 only
ssite = getAvgPacketLossbyHost(es, 'src_host', 'day', '1575151349000', '1579687349000')
dsite = getAvgPacketLossbyHost(es, 'dest_host', 'day', '1575151349000', '1579687349000')

IP 2001:4118:900:cc00::112 was found in ps_meta: perfsonar02-iep-grid.saske.sk


In [4]:
ssite['unknown'][:10]

[['perfsonar-latency.grid.surfsara.nl', 'Host not part of configuration'],
 ['lcg-lat.sfu.computecanada.ca', 'Host not part of configuration'],
 ['ps.truba.gov.tr', 'Host not part of configuration'],
 ['ps-londhx1.ja.net', 'Host not part of configuration'],
 ['lhcone-wash-opt1.es.net', 'Host not part of configuration'],
 ['perfsonar-fra-1.exoscale.ch', 'Host not part of configuration'],
 ['perfsonar40-otc.hnsc.otc-service.com', 'Host not part of configuration'],
 ['psmp-gn-owd-01.gen.ch.geant.net', 'Host not part of configuration'],
 ['psmp-gn-owd-01.fra.de.geant.net', 'Host not part of configuration'],
 ['psmp-gn-owd-01.ams.nl.geant.net', 'Host not part of configuration']]

In [5]:
dsite['unknown']

[]

In [6]:
# Build the DataFrames
sdf = pd.DataFrame(ssite['resolved'])
ddf = pd.DataFrame(dsite['resolved'])
# sdf

In [7]:
# Add a type column and merge the DataFrames

sdf['period'] = pd.to_datetime(sdf['period'], unit='ms')
# because of the unresolved hosts, we have multiple entries for the same host and the same period, so it is neccessary to find the mean 
sdf = sdf.groupby(['host', 'period'], as_index=False).mean()
sdf['type'] = 'src'

ddf['period'] = pd.to_datetime(ddf['period'], unit='ms')
# because of the unresolved hosts, we have multiple entries for the same host and the same period, so it is neccessary to find the mean 
ddf = ddf.groupby(['host', 'period'], as_index=False).mean()
ddf['type'] = 'dest'

cdf = pd.concat([sdf, ddf], sort=True)

# cdf.head()

In [8]:
print("Total unique: ",len(cdf['host'].unique()),  " Sources: ", len(sdf['host'].unique()), " Destinations: ", len(ddf['host'].unique()))
print("Missing sources: ", sdf[sdf['host'].isin(ddf['host']) == False]['host'].unique())
print("Missing destinations: ", ddf[ddf['host'].isin(sdf['host']) == False]['host'].unique())

Total unique:  204  Sources:  161  Destinations:  199
Missing sources:  ['lhcone-newy-opt1.es.net' 'nanperfs02.in2p3.fr'
 'perfsonar-fra-1.exoscale.ch' 'ps.ncp.edu.pk' 'psonar3.fnal.gov']
Missing destinations:  ['111.68.106.222.pern.pk' 'grid251.kfki.hu' 'haleakala-tp.ps.uhnet.net'
 'ifa-mko10g-tp.ps.uhnet.net' 'ifahilo-tp.ps.uhnet.net' 'iut2-net3.iu.edu'
 'iut2-net5.iu.edu' 'lat-niagara.computecanada.ca' 'lcgperf.shef.ac.uk'
 'maunalani-dl.ps.uhnet.net' 'maunaloa-ps.ps.uhnet.net'
 'net-perf.rcs.le.ac.uk' 'osg.chic.nrp.internet2.edu'
 'osg.kans.nrp.internet2.edu' 'osg.newy32aoa.nrp.internet2.edu'
 'owamp-ps.singaren.net.sg' 'panstarrsitc-tp.ps.uhnet.net'
 'perflat.ciemat.es' 'perfsonar-100g.cscs.ch'
 'perfsonar-b5-data.soton.ac.uk' 'perfsonar-b5-mgt.soton.ac.uk'
 'perfsonar-ext.soton.ac.uk' 'perfsonar.ornl.gov'
 'perfsonar01.jc.rl.ac.uk' 'perfsonar2.recas.ba.infn.it'
 'perfsonar2.roma1.infn.it' 'ps-hpc-management.net.uconn.edu'
 'ps-slough-1g.ja.net' 'ps0002.m45.ihep.su' 'ps01.ncg.ingr

In [9]:
# Build the plots. One can conclude that if a host has a spike in averigeloss for both being a source site and a destination host, 
# then the problem was with that specific host
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

g = sns.FacetGrid(cdf, row="host", hue="type",  sharex=False, 
              sharey=False, height=3, aspect=5)
plt.figure(figsize=(10,5))
g.map(plt.plot,  "period", "avg_loss", alpha=.7)
g.add_legend();
g.savefig("output.png")

In [89]:
# Build same plots but this time with matplolib subplots

hosts = cdf['host'].unique()
rows = round(len(hosts)/2)

fig, axs = plt.subplots(rows, 2, figsize=(30, 240), sharex=False, sharey=False, )
# fig, axs = plt.subplots(4, 2, figsize=(36, 24), sharex=False, sharey=False, )

j=0
l=0
# for i, v in enumerate(hosts[:8]):
for i, v in enumerate(hosts):
#     print(l, j)
    x = cdf[(cdf['host'] == v) & (cdf['type'] == 'src')]['period']
    y = cdf[(cdf['host'] == v) & (cdf['type'] == 'src')]['avg_loss']
    w = cdf[(cdf['host'] == v) & (cdf['type'] == 'dest')]['period']
    z = cdf[(cdf['host'] == v) & (cdf['type'] == 'dest')]['avg_loss']
    axs[l, j].plot(x, y)
    axs[l, j].plot(w, z)
    axs[l, j].set_title(v, fontsize=16)
    axs[l, j].set_ylabel('Loss')
    axs[l, j].set_xlabel('Period')
    axs[l, j].legend(['src', 'dest'], loc='upper right')

    if (j < 1):
        j = j+1
    else: 
        j=0
        l = l+1


fig.set_facecolor('w')
fig.tight_layout()
# fig.savefig("output.png")

In [81]:
df1 = sdf.copy()
df2 = ddf.copy()
df1['period'] = pd.to_datetime(df1['period'], unit='ms')
df2['period'] = pd.to_datetime(df2['period'], unit='ms')

In [88]:
mdf = pd.merge(df1, df2, on=['host', 'period'])
# calculate the mean for all hosts
mdf['mean'] = mdf[['avg_loss_x', 'avg_loss_y']].mean(axis=1)

In [86]:
# Build interactive buble chart to see the trend as well the hosts with the most loss
fig = px.scatter(mdf, x="period", y="host",
                  size=mdf["mean"].fillna(value=0), color="host",
                 hover_name="host"
                )



fig.update_layout(
    title='Avg Packet Loss from 01-12-2019 to 22-01-2020',
    xaxis={
        'title':'Period'},
    yaxis={'title':'Hosts'},
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)')

py.offline.plot(fig)

'temp-plot.html'