# Script to plot the CPU usage of a White Rabbit Switch

To understand the influence of the SNMP queries on the WR Switch performance, i.e. its average CPU usage, several tests have been performed. 
* Influence of the number of SNMP queries 
* Influence of different firmware versions

In [None]:
import os,sys
import pandas as pd
import numpy as np

In [None]:
%matplotlib inline
import matplotlib
from matplotlib import pyplot as plt
#plt.rcParams["figure.dpi"] = 300

# Influence of the number of SNMP queries (H.E.S.S.-like setup)

Three different telegraf configurations have been used on three different WR switches, mimicing a H.E.S.S.-like setup where in the end 6 WR nodes (5 cameras + 1 central trigger) are connected to the switch (currently running as FW v5.0.1). 

* Setup 1 - All currently monitored OIDs are queried (36 in total - only 6 ports are considered)
* Setup 2 - All (identified to be most likely) useful OIDs are queried (113 in total - all 18 ports are considered)
* Setup 3 - All fields and tables are queried. 

In [None]:
### Get the data from the influxDB (telegraf queries) and store to file
# from influxdb import InfluxDBClient, DataFrameClient

# host = "localhost"
# port = 8086
# user = "root"
# password = "root"
# dbname = "telegraf_hess"

# client = DataFrameClient(host, port, user, password, dbname)

# datadict = client.query("SELECT * from snmp")
# data = datadict["snmp"]

# data.to_csv("/home/prokoph/CTA/ArrayClockSystem/WRS/MonitoringWRSS/hess_cpu_test.v0.csv")

## Using data from empty WR switches

In [None]:
snmp = pd.read_csv("/home/prokoph/CTA/ArrayClock_Work/WRS/MonitoringWRSS/hess_cpu_test.v0.csv",index_col=0, parse_dates=True)
print(snmp.shape)
snmp.tail(3)

In [None]:
wrs1 = snmp['agent_host'].map(lambda x: x == '192.168.4.31')
wrs2 = snmp['agent_host'].map(lambda x: x == '192.168.4.32')
wrs3 = snmp['agent_host'].map(lambda x: x == '192.168.4.33')

# make selection for one variable only (and remove all NaN to make plots look nicer)
cpu1 = snmp[wrs1].loc['2020-08-08':'2020-08-09']
cpu1 = cpu1[np.isfinite(cpu1['wrsCPULoadAvg15min'])]
cpu2 = snmp[wrs2].loc['2020-08-08':'2020-08-09']
cpu2 = cpu2[np.isfinite(cpu2['wrsCPULoadAvg15min'])]
cpu3 = snmp[wrs3].loc['2020-08-08':'2020-08-09']
cpu3 = cpu3[np.isfinite(cpu3['wrsCPULoadAvg15min'])]

### Plot as time series

In [None]:
ax = cpu1.wrsCPULoadAvg15min.plot(figsize=(15,8), label='Setup 1')
ax = cpu2.wrsCPULoadAvg15min.plot(figsize=(15,8), label='Setup 2')
ax = cpu3.wrsCPULoadAvg15min.plot(figsize=(15,8), label='Setup 3')

ax.set_title('Average CPU laod of a WR Switch (v5.0, no WR nodes connected)')
ax.set_ylabel('CPU load [%]')
ax.legend()

# print time (in percent) above CPU load of 50% 
t1 = (~cpu1['wrsCPULoadAvg15min'].between(0, 50)).sum()
t2 = (~cpu2['wrsCPULoadAvg15min'].between(0, 50)).sum()
t3 = (~cpu3['wrsCPULoadAvg15min'].between(0, 50)).sum()
print(t1*100/cpu1['wrsCPULoadAvg15min'].count())
print(t2*100/cpu2['wrsCPULoadAvg15min'].count())
print(t3*100/cpu3['wrsCPULoadAvg15min'].count())

### Plot as histogram 

In [None]:
# cut on common time range (empty switches)
wrs1_empty = cpu1
wrs2_empty = cpu2
wrs3_empty = cpu3

# get median of CPU time distribution
m1_empty = wrs1_empty['wrsCPULoadAvg15min'].mean()
s1_empty = wrs1_empty['wrsCPULoadAvg15min'].std()
label1_empty = ('Setup 1 (mean = %.1f%% $\pm$ %.1f%%)' % (m1_empty,s1_empty))
m2_empty = wrs2_empty['wrsCPULoadAvg15min'].mean()
s2_empty = wrs2_empty['wrsCPULoadAvg15min'].std()
label2_empty = ('Setup 2 (mean = %.1f%% $\pm$ %.1f%%)' % (m2_empty,s2_empty))
m3_empty = wrs3_empty['wrsCPULoadAvg15min'].mean()
s3_empty = wrs3_empty['wrsCPULoadAvg15min'].std()
label3_empty = ('Setup 3 (mean = %.1f%% $\pm$ %.1f%%)' % (m3_empty,s3_empty))

# do the plotting
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(1, 1, 1)

# histogram binning
mybins = 50
myrange=(0,100)

wrs1_empty.hist(column='wrsCPULoadAvg15min',
             bins=mybins, range=myrange, ax=ax, alpha=0.4, label=label1_empty)
wrs2_empty.hist(column='wrsCPULoadAvg15min',
             bins=mybins, range=myrange, ax=ax, alpha=0.4, label=label2_empty)
wrs3_empty.hist(column='wrsCPULoadAvg15min',
             bins=mybins, range=myrange, ax=ax, alpha=0.3, label=label3_empty)

plt.title('WR Switch (v5.0, no WR nodes connected)')
plt.xlabel('15min average CPU load [%]')
plt.legend()

fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(1, 1, 1)
# histogram binning
mybins = 80
myrange=(0,80)

wrs1_empty.hist(column='wrsCPULoadAvg15min',
             bins=mybins, range=myrange, ax=ax, linewidth=3, histtype='step', label=label1_empty)
wrs2_empty.hist(column='wrsCPULoadAvg15min',
             bins=mybins, range=myrange, ax=ax, linewidth=3, histtype='step', label=label2_empty)

plt.title('WR Switch (v5.0, no WR nodes connected)')
plt.xlabel('15min average CPU load [%]')
plt.legend()

## Using data from WR switches with 6 nodes connected

In [None]:
snmp = pd.read_csv("/home/prokoph/CTA/ArrayClock_Work/WRS/MonitoringWRSS/hess_cpu_test.v1.csv",index_col=0, parse_dates=True)

In [None]:
wrs1 = snmp['agent_host'].map(lambda x: x == '192.168.4.31')
wrs2 = snmp['agent_host'].map(lambda x: x == '192.168.4.32')

# make selection for one variable only (and remove all NaN to make plots look nicer)
cpu1 = snmp[wrs1].loc['2020-08-10':'2020-08-12']
cpu1 = cpu1[np.isfinite(cpu1['wrsCPULoadAvg15min'])]
cpu2 = snmp[wrs2].loc['2020-08-10':'2020-08-12']
cpu2 = cpu2[np.isfinite(cpu2['wrsCPULoadAvg15min'])]

In [None]:
ax = cpu1.wrsCPULoadAvg15min.plot(figsize=(15,8), label='Setup 1')
ax = cpu2.wrsCPULoadAvg15min.plot(figsize=(15,8), label='Setup 2')

ax.set_title('Average CPU laod of a WR Switch (v5.0, six WR nodes connected)')
ax.set_ylabel('CPU load [%]')
ax.legend()

t1 = (~cpu1['wrsCPULoadAvg15min'].between(0, 50)).sum()
t2 = (~cpu2['wrsCPULoadAvg15min'].between(0, 50)).sum()
print(t1*100/cpu1['wrsCPULoadAvg15min'].count())
print(t2*100/cpu2['wrsCPULoadAvg15min'].count())

In [None]:
# get median of CPU time distribution
m1 = cpu1['wrsCPULoadAvg15min'].mean()
s1 = cpu1['wrsCPULoadAvg15min'].std()
label1 = ('Setup 1 (mean = %.1f%% $\pm$ %.1f%%)' % (m1,s1))
m2 = cpu2['wrsCPULoadAvg15min'].mean()
s2 = cpu2['wrsCPULoadAvg15min'].std()
label2 = ('Setup 2 (mean = %.1f%% $\pm$ %.1f%%)' % (m2,s2))

# do the plotting
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(1, 1, 1)

# histogram binning
mybins = 50
myrange=(0,100)

cpu1.hist(column='wrsCPULoadAvg15min',
             bins=mybins, range=myrange, ax=ax, alpha=0.4, label=label1)
cpu2.hist(column='wrsCPULoadAvg15min',
             bins=mybins, range=myrange, ax=ax, alpha=0.4, label=label2)

plt.title('WR Switch (v5.0, six WR nodes connected)')
plt.xlabel('15min average CPU load [%]')
plt.legend()

# fig = plt.figure(figsize=(10,6))
# ax = fig.add_subplot(1, 1, 1)
# # histogram binning
# mybins = 80
# myrange=(0,80)

# cpu1.hist(column='wrsCPULoadAvg15min',
#             bins=mybins, range=myrange, ax=ax, linewidth=3, histtype='step', label=label1)
# cpu2.hist(column='wrsCPULoadAvg15min',
#              bins=mybins, range=myrange, ax=ax, linewidth=3, histtype='step', label=label2)

# plt.title('WR Switch (v5.0, six WR nodes connected)')
# plt.xlabel('15min average CPU load [%]')
# plt.legend()

### Comparison plots

In [None]:
fig = plt.figure(figsize=(16,6))

ax1 = fig.add_subplot(1, 2, 1)
mybins = 40
myrange=(0,80)

l1 = ('Switch w/o nodes (mean = %.1f%% $\pm$ %.1f%%)' % (m1_empty,s1_empty))
l2 = ('Switch w/ 6 nodes (mean = %.1f%% $\pm$ %.1f%%)' % (m1,s1))

wrs1_empty.hist(column='wrsCPULoadAvg15min',
             bins=mybins, range=myrange, ax=ax1, alpha=0.4, label=l1)
cpu1.hist(column='wrsCPULoadAvg15min',
             bins=mybins, range=myrange, ax=ax1, alpha=0.4, label=l2)

plt.title('Setup 1')
plt.xlabel('15min average CPU load [%]')
plt.legend()

ax2 = fig.add_subplot(1, 2, 2)
mybins = 40
myrange=(0,80)

l1 = ('Switch w/o nodes (mean = %.1f%% $\pm$ %.1f%%)' % (m2_empty,s2_empty))
l2 = ('Switch w/ 6 nodes (mean = %.1f%% $\pm$ %.1f%%)' % (m2,s2) )

wrs2_empty.hist(column='wrsCPULoadAvg15min',
             bins=mybins, range=myrange, ax=ax2, alpha=0.4, label=l1)
cpu2.hist(column='wrsCPULoadAvg15min',
             bins=mybins, range=myrange, ax=ax2, alpha=0.4, label=l2)


plt.title('Setup 2')
plt.xlabel('15min average CPU load [%]')
plt.legend()