In [1]:
# Enables figures loading outside of browser.
# If not run, figures will load inline.
%matplotlib

Using matplotlib backend: TkAgg


In [3]:
import os
import math
import pandas as pd
import numpy as np
import matplotlib.dates as dates
import matplotlib.pyplot as plt
import matplotlib.ticker
import datetime
import collections

# Some matplotlib features are version dependent.
assert(matplotlib.__version__ >= '2.1.2')

# Depends on: pip install --upgrade google-cloud-bigquery
import query

In [4]:
def unlog(x, pos):
    v = math.pow(10, x)
    frac, whole = math.modf(v)
    if frac > 0:
        return '%.1f' % v
    else:
        return '%d' % whole

logFormatter = matplotlib.ticker.FuncFormatter(unlog)

In [5]:
result = query.sync_query("""
#standardSQL
SELECT
  name AS hostname,
  FORMAT_TIMESTAMP("%Y-%m-%d", TIMESTAMP_TRUNC(sts, DAY)) AS day,
  UNIX_SECONDS(TIMESTAMP_TRUNC(sts, DAY)) AS ts,
  SUM(IF(metric = 'switch.discards.uplink.tx', value, 0)) AS total_discards,
  SUM(IF(metric = 'switch.unicast.uplink.tx', value, 0)) AS total_packets,
  SUM(IF(metric = 'switch.octets.uplink.tx', value, 0)) AS total_bytes,
  COUNTIF(metric = 'switch.discards.uplink.tx' AND value > 0) / 8640 AS pct_discards

FROM (
  SELECT
    metric,
    REGEXP_EXTRACT(hostname, r'(mlab[1-4].[a-z]{3}[0-9]{2}).*') AS name,
    sample.timestamp AS sts,
    sample.value AS value
  FROM
    `mlab-sandbox.base_tables.switch*`,
    UNNEST(sample) AS sample
  WHERE
       metric LIKE 'switch.discards.uplink.tx'
    OR metric LIKE 'switch.unicast.uplink.tx'
    OR metric LIKE 'switch.octets.uplink.tx'
  GROUP BY
    hostname, metric, sts, value
)
WHERE
  name IS NOT NULL
GROUP BY
  hostname, day, ts
ORDER BY
  hostname, day, ts
""")

df_disco = pd.DataFrame(result)

In [6]:
# DISCO RATES 90th PERCENTILE

result = query.sync_query("""
#standardSQL
SELECT
  name AS hostname,
  FORMAT_TIMESTAMP("%Y-%m-%d", TIMESTAMP_TRUNC(sts, DAY)) AS day,
  UNIX_SECONDS(TIMESTAMP_TRUNC(sts, DAY)) AS ts,
  
  APPROX_QUANTILES(value, 101)[ORDINAL(50)] as bytes_50th,
  APPROX_QUANTILES(value, 101)[ORDINAL(90)] as bytes_90th,
  APPROX_QUANTILES(value, 101)[ORDINAL(98)] as bytes_98th,
  APPROX_QUANTILES(value, 101)[ORDINAL(99)] as bytes_99th,
  MAX(value) as bytes_max

FROM (
  SELECT
    metric,
    REGEXP_EXTRACT(hostname, r'(mlab[1-4].[a-z]{3}[0-9]{2}).*') AS name,
    sample.timestamp AS sts,
    sample.value AS value
  FROM
    `mlab-sandbox.base_tables.switch*`,
    UNNEST(sample) AS sample
  WHERE
    metric LIKE 'switch.octets.uplink.tx'
  GROUP BY
    hostname, metric, sts, value
)
WHERE
  name IS NOT NULL
GROUP BY
  hostname, day, ts
ORDER BY
  hostname, day, ts
""")

df_disco_max = pd.DataFrame(result)

In [38]:
result = query.sync_query(
    """#standardSQL                                                                    
CREATE TEMPORARY FUNCTION sliceFromIP(ipaddr STRING)
    AS ( MOD(CAST(REGEXP_EXTRACT(ipaddr, r'[:.]([0-9]+)$') AS INT64), 64) - 10 );


SELECT
   hostname, ts, count(*) as count
FROM (
    SELECT
        REGEXP_EXTRACT(test_id, r"\d\d\d\d/\d\d/\d\d/(mlab[1-4].[a-z]{3}[0-9]{2})") AS hostname,
        UNIX_SECONDS(TIMESTAMP_TRUNC(log_time, DAY)) AS ts                            
    FROM
         `mlab-sandbox.private.sidestream*`
    WHERE
      REGEXP_CONTAINS(test_id, r"mlab1.(dfw|lga|iad|lax|atl|nuq|yyz)[0-9]{2}.*")     
      AND sliceFromIP(web100_log_entry.connection_spec.local_ip) = 1
      --AND web100_log_entry.snap.HCThruOctetsAcked >= 1000000 -- 819200                          
      --AND (web100_log_entry.snap.SndLimTimeRwin +                                   
      --  web100_log_entry.snap.SndLimTimeCwnd +                                      
      --  web100_log_entry.snap.SndLimTimeSnd) >= 9000000                             
      --AND (web100_log_entry.snap.SndLimTimeRwin +                                   
      --  web100_log_entry.snap.SndLimTimeCwnd +                                      
      --  web100_log_entry.snap.SndLimTimeSnd) < 600000000                            
      --AND (web100_log_entry.snap.State = 1 OR                                       
      --  (web100_log_entry.snap.State >= 5 AND                                       
      --  web100_log_entry.snap.State <= 11))

    GROUP BY
      hostname, ts, web100_log_entry.connection_spec.remote_ip, web100_log_entry.connection_spec.remote_port, web100_log_entry.connection_spec.local_port, web100_log_entry.connection_spec.local_ip
)

GROUP BY
  hostname, ts
ORDER BY
  hostname, ts
    """)
df_ss_count = pd.DataFrame(result)



In [112]:
result = query.sync_query(
    """#standardSQL   
CREATE TEMPORARY FUNCTION
  sliceFromIP(ipaddr STRING) AS ( MOD(MOD(CAST(REGEXP_EXTRACT(ipaddr, r'[:.]([0-9]+)$') AS INT64) - 10, 64), 13) );
SELECT
  site,
  index,
  UNIX_SECONDS(TIMESTAMP_TRUNC(TIMESTAMP_MICROS(StartTimeStamp), DAY)) AS ts,
  SUM(bytes) / 86400 AS bytes_per_sec
FROM (
  SELECT
    REGEXP_EXTRACT(test_id, r"\d\d\d\d/\d\d/\d\d/mlab[1-4].([a-z]{3}[0-9]{2})") AS site,
    sliceFromIP(web100_log_entry.connection_spec.local_ip) AS index,
    MAX(web100_log_entry.snap.HCThruOctetsAcked) AS bytes,
    web100_log_entry.snap.StartTimeStamp AS StartTimeStamp
  FROM
    -- `mlab-sandbox.private.sidestream*`
    `mlab-oti.private.sidestream*`
  WHERE
    REGEXP_CONTAINS(test_id, r"mlab[1-4].(dfw|lga|iad|lax|atl|nuq|yyz)[0-9]{2}.*")
          --AND sliceFromIP(web100_log_entry.connection_spec.local_ip) = 1
      --AND web100_log_entry.snap.HCThruOctetsAcked >= 1000000 -- 819200                          
      --AND (web100_log_entry.snap.SndLimTimeRwin +                                   
      --  web100_log_entry.snap.SndLimTimeCwnd +                                      
      --  web100_log_entry.snap.SndLimTimeSnd) >= 9000000                             
      --AND (web100_log_entry.snap.SndLimTimeRwin +                                   
      --  web100_log_entry.snap.SndLimTimeCwnd +                                      
      --  web100_log_entry.snap.SndLimTimeSnd) < 600000000                            
      --AND (web100_log_entry.snap.State = 1 OR                                       
      --  (web100_log_entry.snap.State >= 5 AND                                       
      --  web100_log_entry.snap.State <= 11))
  GROUP BY
    site,
    web100_log_entry.snap.StartTimeStamp,
    index,
    web100_log_entry.connection_spec.remote_ip,
    web100_log_entry.connection_spec.remote_port,
    web100_log_entry.connection_spec.local_port,
    web100_log_entry.connection_spec.local_ip )
GROUP BY
  site,
  index,
  ts
    """)
df_ss_bytes = pd.DataFrame(result)



In [95]:
result = query.sync_query(
    """#standardSQL   
SELECT
  site,
  UNIX_SECONDS(TIMESTAMP_TRUNC(TIMESTAMP_MICROS(StartTimeStamp), DAY)) AS ts,
  SUM(bytes) / 86400 AS bytes_per_sec
FROM (
  SELECT
    REGEXP_EXTRACT(test_id, r"\d\d\d\d/\d\d/\d\d/mlab[1-4].([a-z]{3}[0-9]{2})") AS site,
    MAX(web100_log_entry.snap.HCThruOctetsAcked) AS bytes,
    web100_log_entry.snap.StartTimeStamp AS StartTimeStamp
  FROM
    `mlab-sandbox.private.sidestream*`
  WHERE
    REGEXP_CONTAINS(test_id, r"mlab[1-4].(dfw|lga|iad|lax|atl|nuq|yyz)[0-9]{2}.*")
          --AND sliceFromIP(web100_log_entry.connection_spec.local_ip) = 1
      --AND web100_log_entry.snap.HCThruOctetsAcked >= 1000000 -- 819200                          
      --AND (web100_log_entry.snap.SndLimTimeRwin +                                   
      --  web100_log_entry.snap.SndLimTimeCwnd +                                      
      --  web100_log_entry.snap.SndLimTimeSnd) >= 9000000                             
      --AND (web100_log_entry.snap.SndLimTimeRwin +                                   
      --  web100_log_entry.snap.SndLimTimeCwnd +                                      
      --  web100_log_entry.snap.SndLimTimeSnd) < 600000000                            
      --AND (web100_log_entry.snap.State = 1 OR                                       
      --  (web100_log_entry.snap.State >= 5 AND                                       
      --  web100_log_entry.snap.State <= 11))
  GROUP BY
    site,
    web100_log_entry.snap.StartTimeStamp,
    web100_log_entry.connection_spec.remote_ip,
    web100_log_entry.connection_spec.remote_port,
    web100_log_entry.connection_spec.local_port,
    web100_log_entry.connection_spec.local_ip )
GROUP BY
  site,
  ts
    """)
df_ss_total_bytes = pd.DataFrame(result)



# Discards over time

In [9]:
sites = [
    ['dfw', 'lga', 'iad'],
    ['sea', 'atl', 'den'],
    ['mia', 'nuq', 'ord'],
]

fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(12, 10))
for i, site_row in enumerate(sites):
    for j, site in enumerate(site_row):
        if j != 0:
            axes[i, j].set_yticklabels([])
        if i != len(sites)-1:
            axes[i, j].set_xticklabels([])
        for h in set(df_disco['hostname']):
            if ('mlab1.' + site) in h:
                ds = df_disco[ (df_disco['hostname'] == h) & (df_disco['total_discards'] > 100)& (df_disco['total_discards'] < 1000000)]
                axes[i, j].plot_date(dates.epoch2num(ds['ts']), ds['total_discards'], ls='-', ms=0, label=h[6:11])

        axes[i, j].set_title(site)
        axes[i, j].set_ylim(100, 1000000)
        axes[i, j].tick_params(axis='x', labelrotation=90)
        axes[i, j].grid(color='#dddddd')
        axes[i, j].legend(loc=4, fontsize='x-small')
        axes[i, j].semilogy()
        
fig.subplots_adjust(hspace=0.3, wspace=0.4)
fig.suptitle('Discards over time')

Text(0.5,0.98,'Discards over time')

# Avg Daily Rate over time

In [10]:
sites = [
    ['dfw', 'lga', 'iad'],
    ['sea', 'atl', 'den'],
    ['mia', 'nuq', 'ord'],
]

fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(12, 10))
for i, site_row in enumerate(sites):
    for j, site in enumerate(site_row):
        if j != 0:
            axes[i, j].set_yticklabels([])
        if i != len(sites)-1:
            axes[i, j].set_xticklabels([])
        for h in set(df_disco['hostname']):
            if ('mlab1.' + site) in h:
                ds = df_disco[ (df_disco['hostname'] == h) ] # & (df_disco['total_discards'] > 100)& (df_disco['total_discards'] < 1000000)]
                axes[i, j].plot_date(dates.epoch2num(ds['ts']), ds['total_bytes'] / 1000000 / 86400, ls='-', ms=0, label=h[6:11])

        axes[i, j].set_title(site)
        axes[i, j].set_ylim(1, 1000)
        axes[i, j].tick_params(axis='x', labelrotation=90)
        axes[i, j].grid(color='#dddddd')
        axes[i, j].legend(loc=2, fontsize='x-small', ncol=3)
        axes[i, j].semilogy()
        
fig.subplots_adjust(hspace=0.3, wspace=0.4)
fig.suptitle('Daily Avg Rate over time')

Text(0.5,0.98,'Daily Avg Rate over time')

# 90th Percentile Over time

In [12]:
sites = [
    ['dfw', 'lga', 'iad'],
    ['lax', 'atl', 'den'],
    ['sea', 'nuq', 'ord'], # MIA is low utilization.
]

sites = [
    ['dfw', 'lga', 'iad'],
    ['lax', 'atl',  'nuq'], #  'ord', # MIA is low utilization. 'den', 'sea' low enough.
]

cols = len(sites[0])
fig = plt.figure(figsize=(4 * cols, 6))
axes = [
    [None] * cols,
    [None] * cols,
    #[None] * cols,
]

for r, siter in enumerate(sites):
    for c, site in enumerate(siter):
        for x, rate in enumerate(['90th']):
            axes[r][c] = plt.subplot2grid((2, cols), (r, c))
            if c != 0:
                axes[r][c].set_yticklabels([])
            else:
                axes[r][c].set_ylabel('Mbps')

            if r != 1:
                axes[r][c].set_xticklabels([])

            prefix = 'mlab1.' + site
            ds_sites = df_disco_max[ df_disco_max['hostname'].str.contains(prefix) ]
            for h in sorted(set(ds_sites[ ds_sites['hostname'].str.contains(prefix) ]['hostname'])):
                ds = ds_sites[ (ds_sites['hostname'].str.contains(h)) ]
                axes[r][c].plot_date(dates.epoch2num(ds['ts']), ds['bytes_' + rate] * 8 / 10000000, ls='-', ms=0, label=h[6:11] + '-' +  rate)

            axes[r][c].set_title(site)
            axes[r][c].set_ylim(100, 1000)
            axes[r][c].tick_params(axis='x', labelrotation=90)
            axes[r][c].grid(color='#dddddd')
            axes[r][c].legend(loc=2, fontsize='x-small', ncol=2)

fig.suptitle('Daily Percentile Rates')
#fig.tight_layout()
#fig.subplots_adjust(hspace=0.2, wspace=0.2)

plt.show()

## SS COUNTS

In [39]:
# [ 'lga', nuq'], #  'ord', # MIA is low utilization. 'den', 'sea' low enough.

sites = [
    ['dfw', 'iad', 'lax', 'atl', 'lga'],
    #['dfw', 'iad', 'lax', 'atl'],
]

cols = len(sites[0])
fig = plt.figure(figsize=(4 * cols, 6))
axes = [
    [None] * cols,
    [None] * cols,
]

for r, siter in enumerate(sites):
    for c, site in enumerate(siter):

        for x, rate in enumerate(['98th']):
            r = 1
            axes[r][c] = plt.subplot2grid((2, cols), (r, c))
            if c != 0:
                axes[r][c].set_yticklabels([])
                pass
            else:
                axes[r][c].set_ylabel('Connection Counts')

            if r != 1:
                axes[r][c].set_xticklabels([])

            prefix = 'mlab1.' + site
            ds_sites = df_ss_count[ df_ss_count['hostname'].str.contains(prefix) ]
            for h in sorted(set(ds_sites[ ds_sites['hostname'].str.contains(prefix) ]['hostname'])):
                #if 'lga02' in h:
                #    continue
                ds = ds_sites[ (ds_sites['hostname'].str.contains(h)) ]
                axes[r][c].plot_date(dates.epoch2num(ds['ts']), ds['count'], ls='-', ms=0, label=h[6:11])

            axes[r][c].set_title(site)
            axes[r][c].set_ylim(0, 150000)
            axes[r][c].set_xlim(dates.epoch2num(1498867200), dates.epoch2num(1533081600))
            axes[r][c].tick_params(axis='x', labelrotation=90)
            axes[r][c].grid(color='#dddddd')
            axes[r][c].legend(loc=2, fontsize='x-small', ncol=2)
            
    for c, site in enumerate(siter):
        for r in [0]:
            axes[r][c] = plt.subplot2grid((2, cols), (r, c))
            if c != 0:
                axes[r][c].set_yticklabels([])
            else:
                axes[r][c].set_ylabel('Mbps')

            if r != 1:
                axes[r][c].set_xticklabels([])

            prefix = 'mlab1.' + site
            ds_sites = df_disco_max[ df_disco_max['hostname'].str.contains(prefix) ]
            for h in sorted(set(ds_sites[ ds_sites['hostname'].str.contains(prefix) ]['hostname'])):
                ds = ds_sites[ (ds_sites['hostname'].str.contains(h)) ]
                axes[r][c].plot_date(dates.epoch2num(ds['ts']), ds['bytes_' + rate] * 8 / 10000000, ls='-', ms=0, label=h[6:11] + '-' +  rate)

            axes[r][c].set_title(site)
            axes[r][c].set_ylim(100, 1000)
            axes[r][c].set_xlim(dates.epoch2num(1498867200), dates.epoch2num(1533081600))
            axes[r][c].tick_params(axis='x', labelrotation=90)
            axes[r][c].grid(color='#dddddd')
            axes[r][c].legend(loc=2, fontsize='x-small', ncol=2)

fig.suptitle('Daily 98th Percentile Switch Traffic & TCP Connection Counts Per Metro')
#fig.tight_layout()
#fig.subplots_adjust(hspace=0.2, wspace=0.2)

plt.show()

In [96]:
# [ 'lga', nuq'], #  'ord', # MIA is low utilization. 'den', 'sea' low enough.

sites = [
    ['dfw', 'iad', 'lax', 'atl', 'lga'],
    #['dfw', 'iad', 'lax', 'atl'],
]

cols = len(sites[0])
fig = plt.figure(figsize=(4 * cols, 6))
axes = [
    [None] * cols,
    [None] * cols,
]

for r, siter in enumerate(sites):
    for c, site in enumerate(siter):

        for x, rate in enumerate(['98th']):
            r = 1
            axes[r][c] = plt.subplot2grid((2, cols), (r, c))
            if c != 0:
                axes[r][c].set_yticklabels([])
                pass
            else:
                axes[r][c].set_ylabel('Dailiy Avg Mbps')

            if r != 1:
                axes[r][c].set_xticklabels([])

            prefix = site
            ds_sites = df_ss_total_bytes[ df_ss_total_bytes['site'].str.contains(prefix) ]
            for h in sorted(set(ds_sites[ ds_sites['site'].str.contains(prefix) ]['site'])):
                #if 'lga02' in h:
                #    continue
                ds = ds_sites[ (ds_sites['site'].str.contains(h)) ]
                ds = ds.sort_values(by=['ts'])
                axes[r][c].plot_date(dates.epoch2num(ds['ts']), 8 * ds['bytes_per_sec'] / 1000000, ls='-', ms=0, label=h)

            axes[r][c].set_title(site)
            #axes[r][c].set_ylim(0, 150000)
            axes[r][c].set_xlim(dates.epoch2num(1498867200), dates.epoch2num(1533081600))
            axes[r][c].tick_params(axis='x', labelrotation=90)
            axes[r][c].grid(color='#dddddd')
            axes[r][c].legend(loc=2, fontsize='x-small', ncol=2)
            
    for c, site in enumerate(siter):
        for r in [0]:
            axes[r][c] = plt.subplot2grid((2, cols), (r, c))
            if c != 0:
                axes[r][c].set_yticklabels([])
            else:
                axes[r][c].set_ylabel('Mbps')

            if r != 1:
                axes[r][c].set_xticklabels([])

            prefix = 'mlab1.' + site
            ds_sites = df_disco_max[ df_disco_max['hostname'].str.contains(prefix) ]
            for h in sorted(set(ds_sites[ ds_sites['hostname'].str.contains(prefix) ]['hostname'])):
                ds = ds_sites[ (ds_sites['hostname'].str.contains(h)) ]
                axes[r][c].plot_date(dates.epoch2num(ds['ts']), ds['bytes_' + rate] * 8 / 10000000, ls='-', ms=0, label=h[6:11] + '-' +  rate)

            axes[r][c].set_title(site)
            axes[r][c].set_ylim(100, 1000)
            axes[r][c].set_xlim(dates.epoch2num(1498867200), dates.epoch2num(1533081600))
            axes[r][c].tick_params(axis='x', labelrotation=90)
            axes[r][c].grid(color='#dddddd')
            axes[r][c].legend(loc=2, fontsize='x-small', ncol=2)

fig.suptitle('Daily 98th Percentile Switch Traffic & TCP Connection Counts Per Metro')
#fig.tight_layout()
#fig.subplots_adjust(hspace=0.2, wspace=0.2)

plt.show()

In [89]:
print set(df_disco_max[df_disco_max['hostname'].str.contains('mlab1.dfw')]['hostname'])

set([u'mlab1.dfw06', u'mlab1.dfw05', u'mlab1.dfw04', u'mlab1.dfw03', u'mlab1.dfw02', u'mlab1.dfw01'])


In [113]:
# [ 'lga', nuq'], #  'ord', # MIA is low utilization. 'den', 'sea' low enough.

sites = {
    1: ['dfw', 'iad', 'lax', 'atl', 'lga', 'yyz'],
    7: ['dfw', 'iad', 'lax', 'atl', 'lga', 'yyz'],
    9: ['dfw', 'iad', 'lax', 'atl', 'lga', 'yyz'],
}

cols = len(sites[1])
fig = plt.figure(figsize=(4 * cols, 6))
axes = [
    [None] * cols,
    [None] * cols,
    [None] * cols,
    [None] * cols,
]

for r, (slice_index, siter) in enumerate(sites.iteritems()):
    r += 1
    for c, site in enumerate(siter):
        if True:
            axes[r][c] = plt.subplot2grid((4, cols), (r, c))
            if c != 0:
                axes[r][c].set_yticklabels([])
                pass
            else:
                axes[r][c].set_ylabel('Daily Average Mbps: slice(%d)' % slice_index)

            if r != 3:
                axes[r][c].set_xticklabels([])

            prefix =  site
            ds_sites = df_ss_bytes[ (df_ss_bytes['index'] == slice_index) ]
            ds_sites = ds_sites[ ds_sites['site'].str.contains(prefix) ]

            for h in sorted(set(ds_sites[ ds_sites['site'].str.contains(prefix) ]['site'])):
                #if 'lga02' in h:
                #    continue
                ds = ds_sites[ (ds_sites['site'].str.contains(h)) ]
                ds = ds.sort_values(by=['ts'])
                #print ds
                axes[r][c].plot_date(dates.epoch2num(ds['ts']), 8 * ds['bytes_per_sec'] / 1000000, ls='-', ms=0, label=h)

            axes[r][c].set_title(site)
            axes[r][c].set_ylim(0, 250)
            #axes[r][c].set_xlim(dates.epoch2num(1498867200), dates.epoch2num(1533081600))
            axes[r][c].set_xlim(dates.epoch2num(1527811200), dates.epoch2num(1533081600))
            axes[r][c].tick_params(axis='x', labelrotation=90)
            axes[r][c].grid(color='#dddddd')
            axes[r][c].legend(loc=2, fontsize='x-small', ncol=2)
            
    for c, site in enumerate(siter):
        for r in [0]:
            axes[r][c] = plt.subplot2grid((4, cols), (r, c))
            if c != 0:
                axes[r][c].set_yticklabels([])
            else:
                axes[r][c].set_ylabel('Mbps')

            if r != 1:
                axes[r][c].set_xticklabels([])

            prefix = 'mlab1.' + site
            ds_sites = df_disco_max[ df_disco_max['hostname'].str.contains(prefix) ]
            for h in sorted(set(ds_sites[ ds_sites['hostname'].str.contains(prefix) ]['hostname'])):
                ds = ds_sites[ (ds_sites['hostname'].str.contains(h)) ]
                axes[r][c].plot_date(dates.epoch2num(ds['ts']), ds['bytes_98th'] * 8 / 10000000, ls='-', ms=0, label=h[6:11] + '-' +  rate)

            axes[r][c].set_title(site)
            axes[r][c].set_ylim(100, 1000)
            axes[r][c].set_xlim(dates.epoch2num(1498867200), dates.epoch2num(1533081600))
            #axes[r][c].set_xlim(dates.epoch2num(1527811200), dates.epoch2num(1533081600))
            axes[r][c].tick_params(axis='x', labelrotation=90)
            axes[r][c].grid(color='#dddddd')
            axes[r][c].legend(loc=2, fontsize='x-small', ncol=2)

fig.suptitle('Daily 98th Percentile Switch Traffic & Daily Avg TCP Download Per Metro')
#fig.tight_layout()
#fig.subplots_adjust(hspace=0.2, wspace=0.2)

plt.show()

# Percent of Timebins with Discards 

In [6]:
title = 'Daily percentage of timebins with any discards'
sites = [
    ['mlab1.dfw02', 'mlab1.dfw03', 'mlab1.dfw04'],
]

fig, axes = plt.subplots(nrows=1, ncols=len(sites[0]))
for i, hosts in enumerate(sites):
    for j, host in enumerate(hosts): 
        ax = axes[j]
        
        ds = df_disco[ df_disco['hostname'] == host ]
        ax.plot_date(dates.epoch2num(ds['ts']), ds['pct_discards'], ls='-', ms=0, label=host)
        
        ax.set_title(host)
        ax.set_ylim(-0.01, .4)
        ax.tick_params(axis='x', labelrotation=90)
        ax.grid(color='#dddddd')
        ax.legend(loc=4, fontsize='x-small')
        
        
fig.subplots_adjust(hspace=0.3, wspace=0.4)
fig.suptitle(title)

Text(0.5,0.98,u'Daily percentage of timebins with any discards')

# Total Packets

In [58]:
title = 'Daily percentage of timebins with any discards'
sites = [
    ['mlab1.dfw02', 'mlab1.dfw03', 'mlab1.dfw04'],
]

fig, axes = plt.subplots(nrows=1, ncols=len(sites[0]))
for i, hosts in enumerate(sites):
    for j, host in enumerate(hosts): 
        ax = axes[j]
        
        ds = df_disco[ df_disco['hostname'] == host ]
        ax.plot_date(dates.epoch2num(ds['ts']), ds['total_packets'], ls='-', ms=0, label=host)
        
        ax.set_title(host)
        #ax.set_ylim(-0.01, .4)
        ax.tick_params(axis='x', labelrotation=90)
        ax.grid(color='#dddddd')
        ax.legend(loc=4, fontsize='x-small')
        
        
fig.subplots_adjust(hspace=0.3, wspace=0.4)
fig.suptitle(title)

Text(0.5,0.98,u'Daily percentage of timebins with any discards')

# Total Packet Discard Ratios (Switch Loss Rate)

In [6]:
sites = [
    ['dfw', 'lga', 'iad'],
    ['sea', 'atl', 'den'],
    ['mia', 'nuq', 'ord'],
]

fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(12, 10))
for i, site_row in enumerate(sites):
    for j, site in enumerate(site_row):
        axes[i, j].set_title(site)
        if j != 0:
            axes[i, j].set_yticklabels([])
        if i != len(sites)-1:
            axes[i, j].set_xticklabels([])
        if j == 0:
            axes[i, j].set_ylabel('Daily Loss Ratio')

        for h in set(df_disco['hostname']):
            if 'mlab1.' + site in h:
                ds = df_disco[ (df_disco['hostname'] == h) &
                               (df_disco['total_discards'] > 100) &
                               (df_disco['total_discards'] < 1000000) ]
                ratio = ds['total_discards'] / ds['total_packets']
                axes[i, j].plot_date(dates.epoch2num(ds['ts']), ratio, ls='-', ms=0, label=h[:11])
        axes[i, j].set_ylim(10**-6, 10**-3)
        axes[i, j].tick_params(axis='x', labelrotation=90)
        axes[i, j].grid(color='#dddddd')
        axes[i, j].legend(loc=4, fontsize='x-small')
        axes[i, j].semilogy()
        
fig.subplots_adjust(hspace=0.3, wspace=0.4)
fig.suptitle('Switch Packet Loss Rate')

Text(0.5,0.98,u'Switch Packet Loss Rate')