In [None]:
import pandas as pd
import sqlite3 as sq
import pyasn
from IPy import IP
import requests

#### ================================================================================

# Load measurements

In [None]:
full_df = pd.read_parquet('measurements.parquet')

In [None]:
def create_table_column(row):
    if row['resolver_name'] == 'Probe Resolver':
        return 'Probe Do' + row['proto']
    else:
        return 'Public Do' + row['proto']

full_df['resolver_protocol'] = full_df.apply(lambda x: create_table_column(x), axis=1) 

In [None]:
filter_list = pd.read_csv('public-resolvers-ipv4s.csv', header=None)

In [None]:
tmp = full_df[full_df['resolver_name'] == 'Probe Resolver']['dst_address'].isin(filter_list)

In [None]:
tmp.unique()
# False => no Probe resolvers with destinations from filter list

In [None]:
full_df['resolver_name'].unique()

In [None]:
full_df.columns

In [None]:
full_df['edns_udp_size'].unique()

In [None]:
full_df['edns0_buff_size'] = full_df['edns_udp_size'].fillna('none').map(lambda x: x if x in ['512', '1232', '4096', 'none'] else 'other')

In [None]:
full_df['failure'] = full_df['err_msg'].map(lambda x: 'ok' if x is None else 'failed')

### Remove IPv6 target addresses

In [None]:
full_df['dst_address'].str.contains(':').sum()
# IPv6 target addresses

In [None]:
full_df = full_df.drop(full_df[full_df['dst_address'].str.contains(':')].index)

In [None]:
full_df['dst_address'].str.contains(':').sum()
# Should be 0

#### ================================================================================

# Dataset Overview Table: Samples, Failure Rates, EDNS(0) Buffer Sizes - part of Table 3

### Samples

In [None]:
full_df['resolver_protocol'].value_counts()

In [None]:
full_df['resolver_protocol'].value_counts().sum()

### Failures

In [None]:
pd.crosstab(full_df['failure'], full_df['resolver_protocol'], margins=True)

In [None]:
pd.crosstab(full_df['failure'], full_df['resolver_protocol'], margins=True, normalize='columns'
           ).style.format('{:,.2%}')

### EDNS(0) Buffer Sizes

In [None]:
ok_df = full_df[full_df['failure'] == 'ok']

In [None]:
pd.crosstab(ok_df['edns0_buff_size'], ok_df['resolver_protocol'], margins=True)

In [None]:
pd.crosstab(ok_df['edns0_buff_size'], ok_df['resolver_protocol'], margins=True, normalize='columns'
           ).style.format('{:,.2%}')

In [None]:
# DoUDP: recommended buffer size of 1232 B fulfilled

len(ok_df[(ok_df['proto'] == 'UDP') & (ok_df['edns0_buff_size'] == '1232')]) / len(ok_df[ok_df['proto'] == 'UDP']) 

In [None]:
ok_df[ok_df['resolver_protocol'] == 'Public DoUDP']['edns_udp_size'].astype('float').max()

In [None]:
tmp = ok_df[ok_df['resolver_protocol'] == 'Probe DoUDP'
           ]['edns_udp_size'].astype('float').value_counts(normalize=True).sort_index(ascending=False)

In [None]:
tmp[tmp.index > 4096].sum() * 100

In [None]:
pd.crosstab(ok_df['edns0_buff_size'], ok_df['resolver_name'], margins=True)

In [None]:
pd.crosstab(ok_df['edns0_buff_size'], ok_df['resolver_name'], margins=True, normalize='columns'
           ).style.format('{:,.2%}')

#### ================================================================================

# Response Time and Failure Rates analysis

## Create sub dataframe

In [None]:
sub_df = full_df[['msm_id', 'probe_id', 'country_code', 'continent_code',
             'resolver_name', 'dst_address', 'proto', 'result_rt', 'public_src_ip']]

In [None]:
sub_df

In [None]:
sub_df['public_src_ip'].isna().sum()
# Should be 0

### Resolve ASes

In [None]:
def getASN(ip):
    try:
        return asndb.lookup(ip)
    except:
        return '(None, None)'
    
def lookupASName(asn):
    r = requests.get("http://api.asrank.caida.org/v2/restful/asns/" + str(asn))
    json = r.json()
    return json["data"]["asn"]["asnName"]

In [None]:
asndb = pyasn.pyasn('pyasn.dat')
sub_df['asn'] = sub_df.apply(lambda row : getASN(row['public_src_ip'])[0], axis = 1)

In [None]:
sub_df

In [None]:
sub_df['resolver_name'].unique()

In [None]:
sub_df['resolver_name'] = sub_df['resolver_name'].replace({'Cloudflare DNS' : 'Cloudflare',
                                                           'Comodo Secure DNS' : 'Comodo',
                                                           'Google Public DNS' : 'Google',
                                                           'Neustar UltraDNS' : 'Neustar',
                                                           'Yandex.DNS' : 'Yandex'
                                         })

### Distribution of Probes by Continent / Country / ASN

In [None]:
probe_list_with_asn = sub_df.drop_duplicates('probe_id')[['probe_id', 'country_code', 'continent_code',
                                                          'public_src_ip', 'asn']]

In [None]:
probe_list_with_asn

In [None]:
probe_list_with_asn['continent_code'].value_counts()

In [None]:
probe_list_with_asn['continent_code'].value_counts(normalize=True)

In [None]:
probe_dist_by_continent = pd.concat([probe_list_with_asn['continent_code'].value_counts(),
                                     probe_list_with_asn['continent_code'].value_counts(normalize=True)], axis=1)
probe_dist_by_continent = probe_dist_by_continent.reset_index() 
probe_dist_by_continent.columns = ['Continent', 'abs', 'rel']
probe_dist_by_continent['rel'] = probe_dist_by_continent['rel'].map(" ({:.2%})".format)
probe_dist_by_continent

In [None]:
probe_dist_by_continent['Number of Probes'] = probe_dist_by_continent['abs'].astype(str) + probe_dist_by_continent['rel']

In [None]:
print(probe_dist_by_continent[['Continent', 'Number of Probes']].to_latex(index=False))

In [None]:
probe_list_with_asn['country_code'].nunique()

In [None]:
probe_list_with_asn['country_code'].value_counts()

In [None]:
probe_list_with_asn['country_code'].value_counts(normalize=True)

In [None]:
probe_list_with_asn['asn'].nunique()

In [None]:
probe_list_with_asn['asn'].value_counts()

In [None]:
probe_list_with_asn['asn'].value_counts(normalize=True)

In [None]:
pd.concat([probe_list_with_asn['asn'].value_counts().head(10),
           probe_list_with_asn['asn'].value_counts(normalize=True).head(10)], axis=1)

In [None]:
pd.concat([probe_list_with_asn['asn'].value_counts().head(10),
           probe_list_with_asn['asn'].value_counts(normalize=True).head(10)], axis=1).sum()

In [None]:
pd.concat([probe_list_with_asn['asn'].value_counts().tail(-10),
           probe_list_with_asn['asn'].value_counts(normalize=True).tail(-10)], axis=1)

In [None]:
pd.concat([probe_list_with_asn['asn'].value_counts().tail(-10),
           probe_list_with_asn['asn'].value_counts(normalize=True).tail(-10)], axis=1).sum()

In [None]:
top_n = 10

probe_dist_by_asn = pd.concat([probe_list_with_asn['asn'].value_counts().head(top_n),
                               probe_list_with_asn['asn'].value_counts(normalize=True).head(top_n)], axis=1)
probe_dist_by_asn = probe_dist_by_asn.reset_index() 
probe_dist_by_asn.columns = ['ASN', 'abs', 'rel']
probe_dist_by_asn['rel'] = probe_dist_by_asn['rel'].map(" ({:.2%})".format)
probe_dist_by_asn['Number of Probes'] = probe_dist_by_asn['abs'].astype(str) + probe_dist_by_asn['rel']
probe_dist_by_asn['ASN'] = 'AS' + probe_dist_by_asn['ASN'].astype(str)

In [None]:
print(probe_dist_by_asn[['ASN', 'Number of Probes']].to_latex(index=False))

### Split DoTCP and DoUDP results

In [None]:
udp = sub_df[sub_df['proto'] == 'UDP']
tcp = sub_df[sub_df['proto'] == 'TCP']

In [None]:
udp

In [None]:
tcp

#### ================================================================================

# Response Times

## Data Prep

In [None]:
udp_rt = udp.dropna(subset=['result_rt'])

In [None]:
tcp_rt = tcp.dropna(subset=['result_rt'])

In [None]:
rt_pr_udp = udp_rt.groupby(['probe_id', 'resolver_name', 'dst_address', 'continent_code'],
                           as_index=False)['result_rt'].median()

rt_pr_tcp = tcp_rt.groupby(['probe_id', 'resolver_name', 'dst_address', 'continent_code'],
                           as_index=False)['result_rt'].median()

In [None]:
rt_pr_udp

In [None]:
rt_pr_tcp

In [None]:
# inner join will remove all probe resolvers that do not respond over DoTCP
rt_merged = rt_pr_udp.merge(rt_pr_tcp, on=['probe_id', 'resolver_name', 'dst_address', 'continent_code'
                                          ], suffixes=['_udp', '_tcp'])

In [None]:
rt_merged

In [None]:
from ipaddress import ip_address
rt_merged['dst_address'].map(lambda x: ip_address(x).is_private).value_counts()

### Imports for Plotting

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from matplotlib.colors import SymLogNorm
from cycler import cycler

In [None]:
rt_merged['diff_rt'] = rt_merged['result_rt_tcp'] - rt_merged['result_rt_udp']

In [None]:
rt_merged

In [None]:
diff_grps = rt_merged.groupby('resolver_name', as_index=False)

## Plot Regional Heatmaps - Continent part of Figure 2 in the paper

In [None]:
udp_total = rt_merged.groupby('resolver_name', as_index=False)['result_rt_udp'].median()
udp_total['continent_code'] = 'Total'
udp_total = udp_total[['resolver_name', 'continent_code', 'result_rt_udp']]

In [None]:
udp_public_resolver = []
udp_public_resolver.insert(0, {'resolver_name': 'Public Resolver', 'continent_code': 'Total', 'result_rt_udp': rt_merged['result_rt_udp'].median()})
udp_total = pd.concat([pd.DataFrame(udp_public_resolver), udp_total], ignore_index=True)

In [None]:
udp_total

In [None]:
tcp_total = rt_merged.groupby('resolver_name', as_index=False)['result_rt_tcp'].median()
tcp_total['continent_code'] = 'Total'
tcp_total = tcp_total[['resolver_name', 'continent_code', 'result_rt_tcp']]

In [None]:
tcp_public_resolver = []
tcp_public_resolver.insert(0, {'resolver_name': 'Public Resolver', 'continent_code': 'Total', 'result_rt_tcp': rt_merged['result_rt_tcp'].median()})
tcp_total = pd.concat([pd.DataFrame(tcp_public_resolver), tcp_total], ignore_index=True)

In [None]:
tcp_total

In [None]:
udp_heatmap = rt_merged.groupby(['resolver_name', 'continent_code'], as_index=False
                               )['result_rt_udp'].median().append(udp_total)
udp_heatmap_public_resolver = rt_merged.groupby(['continent_code'], as_index=False
                               )['result_rt_udp'].median()
udp_heatmap_public_resolver['resolver_name'] = 'Public Resolver'
udp_heatmap = pd.concat([udp_heatmap_public_resolver, udp_heatmap], ignore_index=True)
udp_heatmap = pd.crosstab(udp_heatmap['resolver_name'], udp_heatmap['continent_code'],
                          values=udp_heatmap['result_rt_udp'], aggfunc='median'
                         )

In [None]:
tcp_heatmap = rt_merged.groupby(['resolver_name', 'continent_code'], as_index=False
                               )['result_rt_tcp'].median().append(tcp_total)
tcp_heatmap_public_resolver = rt_merged.groupby(['continent_code'], as_index=False
                               )['result_rt_tcp'].median()
tcp_heatmap_public_resolver['resolver_name'] = 'Public Resolver'
tcp_heatmap = pd.concat([tcp_heatmap_public_resolver, tcp_heatmap], ignore_index=True)
tcp_heatmap = pd.crosstab(tcp_heatmap['resolver_name'], tcp_heatmap['continent_code'],
                          values=tcp_heatmap['result_rt_tcp'], aggfunc='median'
                         )

In [None]:
# move Probe Resolver to the bottom and Public Resolver to the top
udp_heatmap = udp_heatmap[udp_heatmap.index != 'Probe Resolver'].append(udp_heatmap.loc['Probe Resolver'])
udp_heatmap_public_resolver = udp_heatmap[udp_heatmap.index == 'Public Resolver']
udp_heatmap_tmp = udp_heatmap[udp_heatmap.index != 'Public Resolver']
udp_heatmap = udp_heatmap_public_resolver.append(udp_heatmap_tmp)

tcp_heatmap = tcp_heatmap[tcp_heatmap.index != 'Probe Resolver'].append(tcp_heatmap.loc['Probe Resolver'])
tcp_heatmap_public_resolver = tcp_heatmap[tcp_heatmap.index == 'Public Resolver']
tcp_heatmap_tmp = tcp_heatmap[tcp_heatmap.index != 'Public Resolver']
tcp_heatmap = tcp_heatmap_public_resolver.append(tcp_heatmap_tmp)

In [None]:
udp_heatmap

In [None]:
tcp_heatmap

In [None]:
((tcp_heatmap['Total'] / udp_heatmap['Total']) -1 ) * 100  # relative increase from DoUDP to DoTCP

In [None]:
diff_heatmap = tcp_heatmap - udp_heatmap

In [None]:
diff_heatmap

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(6, 6.5), gridspec_kw = {'wspace':0, 'hspace':0.15}, sharex=True)
ax1 = sns.heatmap(tcp_heatmap,
            annot=True,
            cmap='RdYlGn_r', #'RdYlBu_r',  # "RdYlGn_r",
            cbar_kws={'label' : 'Response Time [ms]'},
            ax=ax1,
            fmt='.1f',
            vmin=0,
            vmax=600,
#             xticklabels=False
           )
ax1.set_xlabel('')
ax1.set_ylabel('')

ax2 = sns.heatmap(diff_heatmap,
            annot=True,
            cmap='RdBu_r', #"RdYlGn_r",
            cbar_kws={'label' : 'Response Time Difference [ms]'},
            ax=ax2,
            fmt='.1f',
            vmin=-200,
            vmax=200,
           )
ax2.set_xlabel('')
ax2.set_ylabel('')

fig.show()
fig.savefig('diff-rt-heatmap-CC-merged.pdf', bbox_inches='tight')

## Plot ASN Heatmaps - ASN part of Figure 2 in the paper

### Data Prep

In [None]:
rt_pr_asn_udp = udp_rt.groupby(['probe_id', 'resolver_name', 'dst_address', 'asn'],
                           as_index=False)['result_rt'].median()

rt_pr_asn_tcp = tcp_rt.groupby(['probe_id', 'resolver_name', 'dst_address', 'asn'],
                           as_index=False)['result_rt'].median()

In [None]:
rt_pr_asn_udp

In [None]:
rt_pr_asn_tcp

In [None]:
# inner join will remove all probe resolvers that do not respond over DoTCP
rt_asn_merged = rt_pr_asn_udp.merge(rt_pr_asn_tcp, on=['probe_id', 'resolver_name', 'dst_address', 'asn'
                                          ], suffixes=['_udp', '_tcp'])

In [None]:
rt_asn_merged

### Get AS with the Most Probes

In [None]:
probe_ids = rt_asn_merged.drop_duplicates(subset=['probe_id'])
probe_ids = probe_ids[['probe_id', 'asn']]
probe_ids

In [None]:
probes_per_asn = probe_ids.groupby('asn', as_index = False).size()
probes_per_asn_top_10 = probes_per_asn.nlargest(10, 'size')
probes_per_asn_top_10['asn_name'] = probes_per_asn_top_10.apply(lambda row : lookupASName(row['asn']), axis = 1)
probes_per_asn_top_10.at[32, 'asn_name'] = 'Orange S.A.'
probes_per_asn_top_10

### Filter RTs to Top 10 AS

In [None]:
rt_asn_merged_top_10 = rt_asn_merged[rt_asn_merged['asn'].isin(probes_per_asn_top_10['asn'])]
rt_asn_merged_top_10

In [None]:
rt_asn_merged_top_10 = rt_asn_merged_top_10.merge(probes_per_asn_top_10, how='left', left_on='asn', right_on='asn')
rt_asn_merged_top_10[(rt_asn_merged_top_10['asn_name'] == 'Orange S.A.') & (rt_asn_merged_top_10['resolver_name'] == 'OpenNIC')]

# Orange S.A. on openNIC is empty, which is expected

In [None]:
udp_asn_heatmap = rt_asn_merged_top_10.groupby(['resolver_name', 'asn_name'], as_index=False
                               )['result_rt_udp'].median()
udp_asn_heatmap_public_resolver = rt_asn_merged_top_10.groupby(['asn_name'], as_index=False
                               )['result_rt_udp'].median()
udp_asn_heatmap_public_resolver['resolver_name'] = 'Public Resolver'
udp_asn_heatmap = pd.concat([udp_asn_heatmap_public_resolver, udp_asn_heatmap], ignore_index=True)
udp_asn_heatmap = pd.crosstab(udp_asn_heatmap['resolver_name'], udp_asn_heatmap['asn_name'],
                          values=udp_asn_heatmap['result_rt_udp'], aggfunc='median'
                         )

In [None]:
tcp_asn_heatmap = rt_asn_merged_top_10.groupby(['resolver_name', 'asn_name'], as_index=False
                               )['result_rt_tcp'].median()
tcp_asn_heatmap_public_resolver = rt_asn_merged_top_10.groupby(['asn_name'], as_index=False
                               )['result_rt_tcp'].median()
tcp_asn_heatmap_public_resolver['resolver_name'] = 'Public Resolver'
tcp_asn_heatmap = pd.concat([tcp_asn_heatmap_public_resolver, tcp_asn_heatmap], ignore_index=True)
tcp_asn_heatmap = pd.crosstab(tcp_asn_heatmap['resolver_name'], tcp_asn_heatmap['asn_name'],
                          values=tcp_asn_heatmap['result_rt_tcp'], aggfunc='median'
                         )

In [None]:
# move Probe Resolver to the bottom and Public Resolver to the top
udp_asn_heatmap = udp_asn_heatmap[udp_asn_heatmap.index != 'Probe Resolver'].append(udp_asn_heatmap.loc['Probe Resolver'])
udp_asn_heatmap_public_resolver = udp_asn_heatmap[udp_asn_heatmap.index == 'Public Resolver']
udp_asn_heatmap_tmp = udp_asn_heatmap[udp_asn_heatmap.index != 'Public Resolver']
udp_asn_heatmap = udp_asn_heatmap_public_resolver.append(udp_asn_heatmap_tmp)

tcp_asn_heatmap = tcp_asn_heatmap[tcp_asn_heatmap.index != 'Probe Resolver'].append(tcp_asn_heatmap.loc['Probe Resolver'])
tcp_asn_heatmap_public_resolver = tcp_asn_heatmap[tcp_asn_heatmap.index == 'Public Resolver']
tcp_asn_heatmap_tmp = tcp_asn_heatmap[tcp_asn_heatmap.index != 'Public Resolver']
tcp_asn_heatmap = tcp_asn_heatmap_public_resolver.append(tcp_asn_heatmap_tmp)

In [None]:
udp_asn_heatmap

In [None]:
tcp_asn_heatmap

In [None]:
diff_asn_heatmap = tcp_asn_heatmap - udp_asn_heatmap

In [None]:
diff_asn_heatmap

In [None]:
probes_per_asn_top_10['asn_name'].unique()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(8, 6.5), gridspec_kw = {'wspace':0, 'hspace':0.15}, sharex=True)
ax1 = sns.heatmap(tcp_asn_heatmap[probes_per_asn_top_10['asn_name'].unique()],
            annot=True,
            cmap='RdYlGn_r', #'RdYlBu_r',  # "RdYlGn_r",
            cbar_kws={'label' : 'Response Time [ms]'},
            ax=ax1,
            fmt='.1f',
            vmin=0,
            vmax=300,
#             xticklabels=False
           )
ax1.set_xlabel('')
ax1.set_ylabel('')

ax2 = sns.heatmap(diff_asn_heatmap[probes_per_asn_top_10['asn_name'].unique()],
            annot=True,
            cmap='RdBu_r', #"RdYlGn_r",
            cbar_kws={'label' : 'Response Time Difference [ms]'},
            ax=ax2,
            fmt='.1f',
            vmin=-200,
            vmax=200,
           )
ax2.set_xlabel('')
ax2.set_ylabel('')

fig.show()
fig.savefig('diff-rt-heatmap-ASN-merged.pdf', bbox_inches='tight')

### Distribution of Probes by Continent / Country / ASN

In [None]:
probe_list_with_asn = sub_df.drop_duplicates('probe_id')[['probe_id', 'country_code', 'continent_code',
                                                          'public_src_ip', 'asn']]

In [None]:
probe_list_with_asn

In [None]:
probe_list_with_asn['continent_code'].value_counts()

In [None]:
probe_list_with_asn['continent_code'].value_counts(normalize=True)

In [None]:
probe_dist_by_continent = pd.concat([probe_list_with_asn['continent_code'].value_counts(),
                                     probe_list_with_asn['continent_code'].value_counts(normalize=True)], axis=1)
probe_dist_by_continent = probe_dist_by_continent.reset_index() 
probe_dist_by_continent.columns = ['Continent', 'abs', 'rel']
probe_dist_by_continent['rel'] = probe_dist_by_continent['rel'].map(" ({:.2%})".format)

In [None]:
probe_dist_by_continent['Number of Probes'] = probe_dist_by_continent['abs'].astype(str) + probe_dist_by_continent['rel']

In [None]:
print(probe_dist_by_continent[['Continent', 'Number of Probes']].to_latex(index=False))

In [None]:
probe_list_with_asn['country_code'].nunique()

In [None]:
probe_list_with_asn['country_code'].value_counts()

In [None]:
probe_list_with_asn['country_code'].value_counts(normalize=True)

In [None]:
probe_list_with_asn['asn'].nunique()

In [None]:
probe_list_with_asn['asn'].value_counts()

In [None]:
probe_list_with_asn['asn'].value_counts(normalize=True)

In [None]:
top_n = 10

In [None]:
pd.concat([probe_list_with_asn['asn'].value_counts().head(top_n),
           probe_list_with_asn['asn'].value_counts(normalize=True).head(top_n)], axis=1)

In [None]:
probe_list_with_asn['asn'].value_counts(normalize=True).head(top_n).sum()

In [None]:
probe_dist_by_asn = pd.concat([probe_list_with_asn['asn'].value_counts().head(top_n),
                               probe_list_with_asn['asn'].value_counts(normalize=True).head(top_n)], axis=1)
probe_dist_by_asn = probe_dist_by_asn.reset_index() 
probe_dist_by_asn.columns = ['ASN', 'abs', 'rel']
probe_dist_by_asn['rel'] = probe_dist_by_asn['rel'].map(" ({:.2%})".format)
probe_dist_by_asn['Number of Probes'] = probe_dist_by_asn['abs'].astype(str) + probe_dist_by_asn['rel']
probe_dist_by_asn['AS'] = probe_dist_by_asn.apply(lambda row : lookupASName(row['ASN']), axis=1)
probe_dist_by_asn['ASN'] = 'AS' + probe_dist_by_asn['ASN'].astype(str)

In [None]:
print(probe_dist_by_asn[['ASN', 'AS', 'Number of Probes']].to_latex(index=False))

#### ================================================================================

# Failure Rates

### Overall Failure Rates

In [None]:
udp_prbres = udp[udp['resolver_name'] == 'Probe Resolver']

In [None]:
len(udp_prbres[udp_prbres['result_rt'].isna()]) / len(udp_prbres)

In [None]:
udp_pubres = udp[udp['resolver_name'] != 'Probe Resolver']

In [None]:
len(udp_pubres[udp_pubres['result_rt'].isna()]) / len(udp_pubres)

In [None]:
tcp_prbres = tcp[tcp['resolver_name'] == 'Probe Resolver']

In [None]:
len(tcp_prbres[tcp_prbres['result_rt'].isna()]) / len(tcp_prbres)

In [None]:
tcp_pubres = tcp[tcp['resolver_name'] != 'Probe Resolver']

In [None]:
len(tcp_pubres[tcp_pubres['result_rt'].isna()]) / len(tcp_pubres)

## Plot Regional Heatmaps - continent part of Figure 1 in the paper

### Calculcated Failure Rates for UDP Heatmap

#### Per Continent ("Total" column)

In [None]:
udp_pubres

In [None]:
tmp_1 = udp_pubres.groupby('resolver_name').size().reset_index().rename(columns={0 : 'num_msm'})
tmp_2 = udp_pubres[udp_pubres['result_rt'].isna()
                  ].groupby('resolver_name').size().reset_index().rename(columns={0 : 'num_failures'})
tmp_3 = []
tmp_3.insert(0, {'resolver_name': 'Public Resolver', 'num_msm': len(udp_pubres), 'num_failures': len(udp_pubres[udp_pubres['result_rt'].isna()])})

udp_frates_all_conts = tmp_1.merge(tmp_2, on=['resolver_name'], how='right')
udp_frates_all_conts = udp_frates_all_conts.append(pd.DataFrame(tmp_3))

udp_frates_all_conts['f_rate'] = udp_frates_all_conts['num_failures'] / udp_frates_all_conts['num_msm'] 
udp_frates_all_conts['continent_code'] = 'Total'

In [None]:
udp_frates_all_conts

#### Per Resolver and Continent

In [None]:
udp_num_msm_by_cont = udp_pubres.groupby(['resolver_name' , 'continent_code']
                                        ).size().reset_index().rename(columns={0 : 'num_msm'})
udp_num_msm_by_cont_public_resolver = udp_pubres.groupby(['continent_code']
                                        ).size().reset_index().rename(columns={0 : 'num_msm'})
udp_num_msm_by_cont_public_resolver['resolver_name'] = 'Public Resolver'
udp_num_msm_by_cont = udp_num_msm_by_cont.append(udp_num_msm_by_cont_public_resolver)
udp_num_msm_by_cont

In [None]:
udp_num_fail_by_cont = udp_pubres[udp_pubres['result_rt'].isna()
                                 ].groupby(['resolver_name' , 'continent_code']
                                          ).size().reset_index().rename(columns={0 : 'num_failures'})
udp_num_fail_by_cont_public_resolver = udp_pubres[udp_pubres['result_rt'].isna()
                                 ].groupby(['continent_code']
                                          ).size().reset_index().rename(columns={0 : 'num_failures'})
udp_num_fail_by_cont_public_resolver['resolver_name'] = 'Public Resolver'
udp_num_fail_by_cont = udp_num_fail_by_cont.append(udp_num_fail_by_cont_public_resolver)
udp_num_fail_by_cont

In [None]:
udp_fail_rates_by_cont = udp_num_fail_by_cont.merge(udp_num_msm_by_cont,
                                                    on=['resolver_name', 'continent_code'], how='right')
udp_fail_rates_by_cont['f_rate'] = udp_fail_rates_by_cont['num_failures'] / udp_fail_rates_by_cont['num_msm'] 

In [None]:
udp_fail_rates_by_cont

In [None]:
udp_fail_rates_by_cont[udp_fail_rates_by_cont['resolver_name'] != 'Public Resolver']['num_failures'].sum()

In [None]:
udp_fail_rates_by_cont[udp_fail_rates_by_cont['resolver_name'] != 'Public Resolver']['num_msm'].sum()

In [None]:
udp_fail_rates_by_cont['num_failures'].sum()/udp_fail_rates_by_cont['num_msm'].sum()

#### Probe Resolver Failure Rates

In [None]:
tmp_1 = udp_prbres[udp_prbres['result_rt'].isna()
                  ].groupby(['resolver_name' , 'continent_code']
                           ).size().reset_index().rename(columns={0 : 'num_failures'})

tmp_2 = udp_prbres.groupby(['resolver_name' , 'continent_code']
                          ).size().reset_index().rename(columns={0 : 'num_msm'})

udp_fail_rates_by_cont_prb = tmp_1.merge(tmp_2, on=['resolver_name', 'continent_code'], how='right')

# manually adding "total column" over all continents for probe resolvers
udp_fail_rates_by_cont_prb.loc[len(udp_fail_rates_by_cont_prb)
                              ] = ['Probe Resolver', 'Total',
                                   udp_fail_rates_by_cont_prb['num_failures'].sum(),
                                   udp_fail_rates_by_cont_prb['num_msm'].sum()]

udp_fail_rates_by_cont_prb['f_rate'] = udp_fail_rates_by_cont_prb['num_failures'] / udp_fail_rates_by_cont_prb['num_msm'] 

In [None]:
udp_fail_rates_by_cont_prb

#### Putting everything together

In [None]:
udp_fail_rates_by_cont_merged = pd.concat([udp_fail_rates_by_cont,
                                           udp_frates_all_conts,
                                           udp_fail_rates_by_cont_prb])

In [None]:
udp_fail_rates_by_cont_merged

In [None]:
udp_crosstab_fail_rates_by_cont = pd.crosstab(udp_fail_rates_by_cont_merged['resolver_name'],
                                              udp_fail_rates_by_cont_merged['continent_code'],
                                              values=udp_fail_rates_by_cont_merged['f_rate'], aggfunc='median'
                                             ).round(3)
udp_crosstab_fail_rates_by_cont = udp_crosstab_fail_rates_by_cont.reindex(['Public Resolver','CleanBrowsing', 'Cloudflare',
                                                                           'Comodo', 'Google', 'Neustar',
                                                                           'OpenDNS', 'OpenNIC', 'Quad9',
                                                                           'UncensoredDNS', 'Yandex',
                                                                           'Probe Resolver'])

In [None]:
udp_crosstab_fail_rates_by_cont

### Calculcated Failure Rates for TCP Heatmap

#### Per Continent ("Total" column)

In [None]:
tmp_1 = tcp_pubres.groupby('resolver_name').size().reset_index().rename(columns={0 : 'num_msm'})
tmp_2 = tcp_pubres[tcp_pubres['result_rt'].isna()
                  ].groupby('resolver_name').size().reset_index().rename(columns={0 : 'num_failures'})
tmp_3 = []
tmp_3.insert(0, {'resolver_name': 'Public Resolver', 'num_msm': len(tcp_pubres), 'num_failures': len(tcp_pubres[tcp_pubres['result_rt'].isna()])})
tcp_frates_all_conts = tmp_1.merge(tmp_2, on=['resolver_name'], how='right')
tcp_frates_all_conts = tcp_frates_all_conts.append(pd.DataFrame(tmp_3))

tcp_frates_all_conts['f_rate'] = tcp_frates_all_conts['num_failures'] / tcp_frates_all_conts['num_msm'] 
tcp_frates_all_conts['continent_code'] = 'Total'

In [None]:
tcp_frates_all_conts

#### Per Resolver and Continent

In [None]:
tcp_num_msm_by_cont = tcp_pubres.groupby(['resolver_name' , 'continent_code']
                                        ).size().reset_index().rename(columns={0 : 'num_msm'})
tcp_num_msm_by_cont_public_resolver = tcp_pubres.groupby(['continent_code']
                                        ).size().reset_index().rename(columns={0 : 'num_msm'})
tcp_num_msm_by_cont_public_resolver['resolver_name'] = 'Public Resolver'
tcp_num_msm_by_cont = tcp_num_msm_by_cont.append(tcp_num_msm_by_cont_public_resolver)
tcp_num_msm_by_cont

In [None]:
tcp_num_fail_by_cont = tcp_pubres[tcp_pubres['result_rt'].isna()
                                 ].groupby(['resolver_name' , 'continent_code']
                                          ).size().reset_index().rename(columns={0 : 'num_failures'})
tcp_num_fail_by_cont_public_resolver = tcp_pubres[tcp_pubres['result_rt'].isna()
                                 ].groupby(['continent_code']
                                          ).size().reset_index().rename(columns={0 : 'num_failures'})
tcp_num_fail_by_cont_public_resolver['resolver_name'] = 'Public Resolver'
tcp_num_fail_by_cont = tcp_num_fail_by_cont.append(tcp_num_fail_by_cont_public_resolver)
tcp_num_fail_by_cont

In [None]:
tcp_fail_rates_by_cont = tcp_num_fail_by_cont.merge(tcp_num_msm_by_cont,
                                                    on=['resolver_name', 'continent_code'], how='right')
tcp_fail_rates_by_cont['f_rate'] = tcp_fail_rates_by_cont['num_failures'] / tcp_fail_rates_by_cont['num_msm'] 

In [None]:
tcp_fail_rates_by_cont

In [None]:
tcp_fail_rates_by_cont[tcp_fail_rates_by_cont['resolver_name'] != 'Public Resolver']['num_failures'].sum()

In [None]:
tcp_fail_rates_by_cont[tcp_fail_rates_by_cont['resolver_name'] != 'Public Resolver']['num_msm'].sum()

In [None]:
tcp_fail_rates_by_cont['num_failures'].sum()/tcp_fail_rates_by_cont['num_msm'].sum()

#### Probe Resolver Failure Rates

In [None]:
tmp_1 = tcp_prbres[tcp_prbres['result_rt'].isna()
                  ].groupby(['resolver_name' , 'continent_code']
                           ).size().reset_index().rename(columns={0 : 'num_failures'})

tmp_2 = tcp_prbres.groupby(['resolver_name' , 'continent_code']
                          ).size().reset_index().rename(columns={0 : 'num_msm'})

tcp_fail_rates_by_cont_prb = tmp_1.merge(tmp_2, on=['resolver_name', 'continent_code'], how='right')

# manually adding "total column" over all continents for probe resolvers
tcp_fail_rates_by_cont_prb.loc[len(tcp_fail_rates_by_cont_prb)
                              ] = ['Probe Resolver', 'Total',
                                   tcp_fail_rates_by_cont_prb['num_failures'].sum(),
                                   tcp_fail_rates_by_cont_prb['num_msm'].sum()]

tcp_fail_rates_by_cont_prb['f_rate'] = tcp_fail_rates_by_cont_prb['num_failures'] / tcp_fail_rates_by_cont_prb['num_msm'] 

In [None]:
tcp_fail_rates_by_cont_prb

#### Putting everything together

In [None]:
tcp_fail_rates_by_cont_merged = pd.concat([tcp_fail_rates_by_cont,
                                           tcp_frates_all_conts,
                                           tcp_fail_rates_by_cont_prb])

In [None]:
tcp_fail_rates_by_cont_merged

In [None]:
tcp_crosstab_fail_rates_by_cont = pd.crosstab(tcp_fail_rates_by_cont_merged['resolver_name'],
                                              tcp_fail_rates_by_cont_merged['continent_code'],
                                              values=tcp_fail_rates_by_cont_merged['f_rate'], aggfunc='median'
                                             ).round(3)
tcp_crosstab_fail_rates_by_cont = tcp_crosstab_fail_rates_by_cont.reindex(['Public Resolver', 'CleanBrowsing', 'Cloudflare',
                                                                           'Comodo', 'Google', 'Neustar',
                                                                           'OpenDNS', 'OpenNIC', 'Quad9',
                                                                           'UncensoredDNS', 'Yandex',
                                                                           'Probe Resolver'])

In [None]:
tcp_crosstab_fail_rates_by_cont

In [None]:
diff_crosstab_fail_rates_by_cont = tcp_crosstab_fail_rates_by_cont - udp_crosstab_fail_rates_by_cont

In [None]:
diff_crosstab_fail_rates_by_cont

In [None]:
from matplotlib.ticker import FuncFormatter

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(6, 6.5), gridspec_kw = {'wspace':0, 'hspace':0.15}, sharex=True)
ax1 = sns.heatmap(tcp_crosstab_fail_rates_by_cont,
            annot=True,
            cmap='RdYlGn_r', #'RdYlBu_r',  # "RdYlGn_r",
            cbar_kws={'label' : 'Failure Rate',
                      'format': FuncFormatter(lambda x, pos: '{:.1%}'.format(x))},
            ax=ax1,
            fmt='.1%',
            vmin=0,
            vmax=0.25,
            xticklabels=False
           )
ax1.set_xlabel('')
ax1.set_ylabel('')

ax2 = sns.heatmap(diff_crosstab_fail_rates_by_cont,
            annot=True,
            cmap='RdBu_r', #'RdYlGn_r',
            cbar_kws={'label' : 'Failure Rate Difference',
                      'format': FuncFormatter(lambda x, pos: '{:.1%}'.format(x))},
            ax=ax2,
            fmt='.1%',
            vmin=-0.25,
            vmax=0.25,
           )
ax2.set_xlabel('')
ax2.set_ylabel('')

fig.show()
fig.savefig('diff-failure-rate-heatmap-CC-merged.pdf', bbox_inches='tight')

## Plot ASN Heatmaps - ASN part of Figure 1 in the paper

## Failure Rates by top ASNs

In [None]:
probes_per_asn_top_10

In [None]:
probes_per_asn_top_10['asn'].unique()

In [None]:
udp_pubres_top_asns = udp_pubres[udp_pubres['asn'].isin(probes_per_asn_top_10['asn'].unique())]
udp_prbres_top_asns = udp_prbres[udp_prbres['asn'].isin(probes_per_asn_top_10['asn'].unique())]

In [None]:
udp_pubres_top_asns

In [None]:
udp_prbres_top_asns

In [None]:
### Calculcated Failure Rates for UDP Heatmap

#### Per Resolver and ASN

udp_num_msm_by_top_asns = udp_pubres_top_asns.groupby(['resolver_name' , 'asn']
                                                     ).size().reset_index().rename(columns={0 : 'num_msm'})
udp_num_msm_by_top_asns_public_resolver = udp_pubres_top_asns.groupby(['asn']
                                        ).size().reset_index().rename(columns={0 : 'num_msm'})
udp_num_msm_by_top_asns_public_resolver['resolver_name'] = 'Public Resolver'
udp_num_msm_by_top_asns = udp_num_msm_by_top_asns.append(udp_num_msm_by_top_asns_public_resolver)



udp_num_fail_by_top_asns = udp_pubres_top_asns[udp_pubres_top_asns['result_rt'].isna()
                                              ].groupby(['resolver_name' , 'asn']
                                                       ).size().reset_index().rename(columns={0 : 'num_failures'})
udp_num_fail_by_top_asns_public_resolver = udp_pubres_top_asns[udp_pubres_top_asns['result_rt'].isna()
                                              ].groupby(['asn']
                                                       ).size().reset_index().rename(columns={0 : 'num_failures'})
udp_num_fail_by_top_asns_public_resolver['resolver_name'] = 'Public Resolver'
udp_num_fail_by_top_asns = udp_num_fail_by_top_asns.append(udp_num_fail_by_top_asns_public_resolver)

In [None]:
udp_fail_rates_by_top_asns = udp_num_fail_by_top_asns.merge(udp_num_msm_by_top_asns,
                                                            on=['resolver_name', 'asn'], how='right')
udp_fail_rates_by_top_asns['f_rate'] = udp_fail_rates_by_top_asns['num_failures'] / udp_fail_rates_by_top_asns['num_msm'] 

In [None]:
udp_fail_rates_by_top_asns

In [None]:
#### Probe Resolver Failure Rates

tmp_1 = udp_prbres_top_asns[udp_prbres_top_asns['result_rt'].isna()
                           ].groupby(['resolver_name' , 'asn']
                                    ).size().reset_index().rename(columns={0 : 'num_failures'})

tmp_2 = udp_prbres_top_asns.groupby(['resolver_name' , 'asn']
                                   ).size().reset_index().rename(columns={0 : 'num_msm'})

udp_fail_rates_by_top_asns_prb = tmp_1.merge(tmp_2, on=['resolver_name', 'asn'], how='right')

udp_fail_rates_by_top_asns_prb['f_rate'] = udp_fail_rates_by_top_asns_prb['num_failures'] / udp_fail_rates_by_top_asns_prb['num_msm'] 

In [None]:
#### Putting everything together

udp_fail_rates_by_top_asns_merged = pd.concat([udp_fail_rates_by_top_asns, udp_fail_rates_by_top_asns_prb])

udp_crosstab_fail_rates_by_top_asns = pd.crosstab(udp_fail_rates_by_top_asns_merged['resolver_name'],
                                                  udp_fail_rates_by_top_asns_merged['asn'],
                                                  values=udp_fail_rates_by_top_asns_merged['f_rate'],
                                                  aggfunc='median').round(3)
udp_crosstab_fail_rates_by_top_asns = udp_crosstab_fail_rates_by_top_asns.reindex(['Public Resolver', 'CleanBrowsing', 'Cloudflare',
                                                                                   'Comodo', 'Google', 'Neustar',
                                                                                   'OpenDNS', 'OpenNIC', 'Quad9',
                                                                                   'UncensoredDNS', 'Yandex',
                                                                                   'Probe Resolver'])

In [None]:
udp_crosstab_fail_rates_by_top_asns

In [None]:
tcp_pubres_top_asns = tcp_pubres[tcp_pubres['asn'].isin(probes_per_asn_top_10['asn'].unique())]
tcp_prbres_top_asns = tcp_prbres[tcp_prbres['asn'].isin(probes_per_asn_top_10['asn'].unique())]

In [None]:
### Calculcated Failure Rates for TCP Heatmap

#### Per Resolver and ASN

tcp_num_msm_by_top_asns = tcp_pubres_top_asns.groupby(['resolver_name' , 'asn']
                                                     ).size().reset_index().rename(columns={0 : 'num_msm'})
tcp_num_msm_by_top_asns_public_resolver = tcp_pubres_top_asns.groupby(['asn']
                                        ).size().reset_index().rename(columns={0 : 'num_msm'})
tcp_num_msm_by_top_asns_public_resolver['resolver_name'] = 'Public Resolver'
tcp_num_msm_by_top_asns = tcp_num_msm_by_top_asns.append(tcp_num_msm_by_top_asns_public_resolver)

tcp_num_fail_by_top_asns = tcp_pubres_top_asns[tcp_pubres_top_asns['result_rt'].isna()
                                              ].groupby(['resolver_name' , 'asn']
                                                       ).size().reset_index().rename(columns={0 : 'num_failures'})
tcp_num_fail_by_top_asns_public_resolver = tcp_pubres_top_asns[tcp_pubres_top_asns['result_rt'].isna()
                                              ].groupby(['asn']
                                                       ).size().reset_index().rename(columns={0 : 'num_failures'})
tcp_num_fail_by_top_asns_public_resolver['resolver_name'] = 'Public Resolver'
tcp_num_fail_by_top_asns = tcp_num_fail_by_top_asns.append(tcp_num_fail_by_top_asns_public_resolver)

tcp_fail_rates_by_top_asns = tcp_num_fail_by_top_asns.merge(tcp_num_msm_by_top_asns,
                                                            on=['resolver_name', 'asn'], how='right')
tcp_fail_rates_by_top_asns['f_rate'] = tcp_fail_rates_by_top_asns['num_failures'] / tcp_fail_rates_by_top_asns['num_msm'] 

In [None]:
tcp_fail_rates_by_top_asns

In [None]:
#### Probe Resolver Failure Rates

tmp_1 = tcp_prbres_top_asns[tcp_prbres_top_asns['result_rt'].isna()].groupby(['resolver_name' , 'asn']
                  ).size().reset_index().rename(columns={0 : 'num_failures'})

tmp_2 = tcp_prbres_top_asns.groupby(['resolver_name' , 'asn']
                  ).size().reset_index().rename(columns={0 : 'num_msm'})

tcp_fail_rates_by_top_asns_prb = tmp_1.merge(tmp_2, on=['resolver_name', 'asn'], how='right')


tcp_fail_rates_by_top_asns_prb['f_rate'] = tcp_fail_rates_by_top_asns_prb['num_failures'] / tcp_fail_rates_by_top_asns_prb['num_msm'] 

In [None]:
#### Putting everything together

tcp_fail_rates_by_top_asns_merged = pd.concat([tcp_fail_rates_by_top_asns, tcp_fail_rates_by_top_asns_prb])

tcp_crosstab_fail_rates_by_top_asns = pd.crosstab(tcp_fail_rates_by_top_asns_merged['resolver_name'],
                                                  tcp_fail_rates_by_top_asns_merged['asn'],
                                                  values=tcp_fail_rates_by_top_asns_merged['f_rate'],
                                                  aggfunc='median').round(3)
tcp_crosstab_fail_rates_by_top_asns = tcp_crosstab_fail_rates_by_top_asns.reindex(['Public Resolver', 'CleanBrowsing', 'Cloudflare',
                                                                                   'Comodo', 'Google', 'Neustar',
                                                                                   'OpenDNS', 'OpenNIC', 'Quad9',
                                                                                   'UncensoredDNS', 'Yandex',
                                                                                   'Probe Resolver'])

In [None]:
top_as_names = probes_per_asn_top_10[['asn', 'asn_name']].set_index('asn').to_dict()['asn_name']
top_as_names

In [None]:
probes_per_asn_top_10['asn_name'].to_list()  # to use descending order based on size

In [None]:
udp_crosstab_fail_rates_by_top_asns = udp_crosstab_fail_rates_by_top_asns.rename(columns=top_as_names)[probes_per_asn_top_10['asn_name'].to_list()]
udp_crosstab_fail_rates_by_top_asns

In [None]:
tcp_crosstab_fail_rates_by_top_asns = tcp_crosstab_fail_rates_by_top_asns.rename(columns=top_as_names)[probes_per_asn_top_10['asn_name'].to_list()]
tcp_crosstab_fail_rates_by_top_asns

In [None]:
diff_crosstab_fail_rates_by_top_asns = tcp_crosstab_fail_rates_by_top_asns - udp_crosstab_fail_rates_by_top_asns

In [None]:
diff_crosstab_fail_rates_by_top_asns

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(8, 6.5), gridspec_kw = {'wspace':0, 'hspace':0.15}, sharex=True)
ax1 = sns.heatmap(tcp_crosstab_fail_rates_by_top_asns,
            annot=True,
            cmap='RdYlGn_r', #'RdYlBu_r',  # "RdYlGn_r",
            cbar_kws={'label' : 'Failure Rate',
                      'format': FuncFormatter(lambda x, pos: '{:.1%}'.format(x))},
            ax=ax1,
            fmt='.1%',
            vmin=0,
            vmax=0.25,
            xticklabels=False
           )
ax1.set_xlabel('')
ax1.set_ylabel('')

ax2 = sns.heatmap(diff_crosstab_fail_rates_by_top_asns,
            annot=True,
            cmap='RdBu_r', #'RdYlGn_r',
            cbar_kws={'label' : 'Failure Rate Difference',
                      'format': FuncFormatter(lambda x, pos: '{:.1%}'.format(x))},
            ax=ax2,
            fmt='.1%',
            vmin=-0.25,
            vmax=0.25,
           )
ax2.set_xlabel('')
ax2.set_ylabel('')

fig.show()
fig.savefig('diff-failure-rate-heatmap-ASN-merged.pdf', bbox_inches='tight')

#### ================================================================================

# Combined Heatmap Plots - Figure 1 and 2 in paper

In [None]:
asns_sorted = probes_per_asn_top_10['asn_name'].unique()
# use to sort columns of ASN heatmaps

In [None]:
tcp_cont_rt = tcp_heatmap
tcp_asn_rt = tcp_asn_heatmap[asns_sorted]
    
diff_cont_rt = diff_heatmap
diff_asn_rt = diff_asn_heatmap[asns_sorted]
    
tcp_cont_fr = tcp_crosstab_fail_rates_by_cont
tcp_asn_fr = tcp_crosstab_fail_rates_by_top_asns[asns_sorted]
    
diff_cont_fr = diff_crosstab_fail_rates_by_cont
diff_asn_fr = diff_crosstab_fail_rates_by_top_asns[asns_sorted]

In [None]:
continents_dict = {'EU' : 'Europe',
                   'NA' : 'North America',
                   'AS' : 'Asia',
                   'OC' : 'Oceania',
                   'SA' : 'South America',
                   'AF' : 'Africa'
                  }

as_dict = {'COMCAST-7922' : 'COMCAST', 
           'Orange S.A.' : 'ORANGE',
           'ATT-INTERNET4' : 'ATT',
           'TNF-AS' : 'TNF',
           'ASN-IBSNAZ' : 'IBSNAZ'}

In [None]:
conts_list = continents_dict.values()
conts_list

In [None]:
for df in [tcp_cont_rt, diff_cont_rt, tcp_cont_fr, diff_cont_fr]:
    df.rename(columns=continents_dict, inplace=True)
    
for df in [tcp_asn_rt, diff_asn_rt, tcp_asn_fr, diff_asn_fr]:
    df.rename(columns=as_dict, inplace=True)

In [None]:
# Plot Params

fig_size = (13, 6.5)
n_rows = 2
n_cols = 3
gridspec_kws = {'wspace':0.05,
                'hspace':0.15,
                'width_ratios': [5, 33, 62]
#                 'width_ratios': [1, 6, 10]
               }

rt_vmin = 0
rt_vmax = 500
rt_fmt = '.1f'

fr_vmin = 0
fr_vmax = 0.25
fr_fmt =  '.1%'

diff_rt_vmin = -200
diff_rt_vmax = 200
diff_fr_vmin = -0.2
diff_fr_vmax = 0.2

abs_cmap = 'RdYlGn_r'
diff_cmap = 'RdBu_r'
cbar_pad = 0.025
cbar_fr_fmt = FuncFormatter(lambda x, pos: '{:.0%}'.format(x))

label_rot = 45

## Combined Response Time Heatmap Plot - Figure 2 in paper

In [None]:
fig, ((ax_total, ax_cont, ax_asn),
      (ax_diff_total, ax_diff_cont, ax_diff_asn)) = plt.subplots(figsize=fig_size,
                                                                 nrows=n_rows, ncols=n_cols,
                                                                 gridspec_kw = gridspec_kws)

ax_total = sns.heatmap(pd.DataFrame(tcp_cont_rt['Total']),
                       annot=True,
                       cmap=abs_cmap,
                       cbar=False,
                       ax=ax_total,
                       fmt=rt_fmt,
                       vmin=rt_vmin,
                       vmax=rt_vmax,
                      )
ax_total.set_xlabel('')
ax_total.set_ylabel('')
ax_total.set_xticklabels([])

ax_diff_total = sns.heatmap(pd.DataFrame(diff_cont_rt['Total']),
                            annot=True,
                            cmap=diff_cmap,
                            cbar=False,
                            ax=ax_diff_total,
                            fmt=rt_fmt,
                            vmin=diff_rt_vmin,
                            vmax=diff_rt_vmax,
                           )

ax_diff_total.set_xlabel('')
ax_diff_total.set_ylabel('')
ax_diff_total.set_xticklabels(ax_diff_total.get_xticklabels(), rotation=45, ha='right')

ax_cont = sns.heatmap(tcp_cont_rt[conts_list],
                      annot=True,
                      cmap=abs_cmap,
                      cbar=False,
                      ax=ax_cont,
                      fmt=rt_fmt,
                      vmin=rt_vmin,
                      vmax=rt_vmax,
                     )
ax_cont.set_xlabel('')
ax_cont.set_ylabel('')
ax_cont.set_xticklabels([])
ax_cont.set_yticklabels([])

ax_diff_cont = sns.heatmap(diff_cont_rt[conts_list],
                           annot=True,
                           cmap=diff_cmap,
                           cbar=False,
                           ax=ax_diff_cont,
                           fmt=rt_fmt,
                           vmin=diff_rt_vmin,
                           vmax=diff_rt_vmax,
                          )
ax_diff_cont.set_xlabel('')
ax_diff_cont.set_ylabel('')
ax_diff_cont.set_xticklabels(ax_diff_cont.get_xticklabels(), rotation=label_rot, ha='right')
ax_diff_cont.set_yticklabels([])

ax_asn = sns.heatmap(tcp_asn_rt,
                     annot=True,
                     cmap=abs_cmap,
                     cbar_kws={'label' : 'Response Time over DoTCP [ms]', 'pad' : cbar_pad},
                     ax=ax_asn,
                     fmt=rt_fmt,
                     vmin=rt_vmin,
                     vmax=rt_vmax,
                    )
ax_asn.set_xlabel('')
ax_asn.set_ylabel('')
ax_asn.set_yticklabels([])
ax_asn.set_xticklabels([])

ax_diff_asn = sns.heatmap(diff_asn_rt,
                          annot=True,
                          cmap=diff_cmap,
                          cbar_kws={'label' : 'Response Time Difference to DoUDP [ms]', 'pad' : cbar_pad},
                          ax=ax_diff_asn,
                          fmt=rt_fmt,
                          vmin=diff_rt_vmin,
                          vmax=diff_rt_vmax,
                         )
ax_diff_asn.set_xlabel('')
ax_diff_asn.set_ylabel('')
ax_diff_asn.set_yticklabels([])
ax_diff_asn.set_xticklabels(ax_diff_asn.get_xticklabels(), rotation=label_rot, ha='right')

fig.show()
fig.savefig('combined-heatmaps-rt.pdf', bbox_inches='tight')

## Combined Failure Rates Heatmap Plot - Figure 1 in paper

In [None]:
fig, ((ax_total, ax_cont, ax_asn),
      (ax_diff_total, ax_diff_cont, ax_diff_asn)) = plt.subplots(figsize=fig_size,
                                                                 nrows=n_rows, ncols=n_cols,
                                                                 gridspec_kw = gridspec_kws)

ax_total = sns.heatmap(pd.DataFrame(tcp_cont_fr['Total']),
                       annot=True,
                       cmap=abs_cmap,
                       cbar=False,
                       ax=ax_total,
                       fmt=fr_fmt,
                       vmin=fr_vmin,
                       vmax=fr_vmax,
                      )
ax_total.set_xlabel('')
ax_total.set_ylabel('')
ax_total.set_xticklabels([])

ax_diff_total = sns.heatmap(pd.DataFrame(diff_cont_fr['Total']),
                            annot=True,
                            cmap=diff_cmap,
                            cbar=False,
                            ax=ax_diff_total,
                            fmt=fr_fmt,
                            vmin=diff_fr_vmin,
                            vmax=diff_fr_vmax,
                           )

ax_diff_total.set_xlabel('')
ax_diff_total.set_ylabel('')
ax_diff_total.set_xticklabels(ax_diff_total.get_xticklabels(), rotation=label_rot, ha='right')

ax_cont = sns.heatmap(tcp_cont_fr[conts_list],
            annot=True,
            cmap=abs_cmap,
            cbar=False,
            ax=ax_cont,
            fmt=fr_fmt,
            vmin=fr_vmin,
            vmax=fr_vmax,
           )
ax_cont.set_xlabel('')
ax_cont.set_ylabel('')
ax_cont.set_xticklabels([])
ax_cont.set_yticklabels([])

ax_diff_cont = sns.heatmap(diff_cont_fr[conts_list],
            annot=True,
            cmap=diff_cmap,
            cbar=False,
            ax=ax_diff_cont,
            fmt=fr_fmt,
            vmin=diff_fr_vmin,
            vmax=diff_fr_vmax,
           )
ax_diff_cont.set_xlabel('')
ax_diff_cont.set_ylabel('')
ax_diff_cont.set_xticklabels(ax_diff_cont.get_xticklabels(), rotation=label_rot, ha='right')
ax_diff_cont.set_yticklabels([])

ax_asn = sns.heatmap(tcp_asn_fr,
            annot=True,
            cmap=abs_cmap,
            cbar_kws={'label' : 'Failure Rate over DoTCP',
                      'format': cbar_fr_fmt,
                      'pad' : cbar_pad},
            ax=ax_asn,
            fmt=fr_fmt,
            vmin=fr_vmin,
            vmax=fr_vmax,
           )
ax_asn.set_xlabel('')
ax_asn.set_ylabel('')
ax_asn.set_yticklabels([])
ax_asn.set_xticklabels([])

ax_diff_asn = sns.heatmap(diff_asn_fr,
            annot=True,
            cmap=diff_cmap,
            cbar_kws={'label' : 'Failure Rate Difference to DoUDP',
                      'format': cbar_fr_fmt,
                      'pad' : cbar_pad,
                     },
            ax=ax_diff_asn,
            fmt=fr_fmt,
            vmin=diff_fr_vmin,
            vmax=diff_fr_vmax,
           )
ax_diff_asn.set_xlabel('')
ax_diff_asn.set_ylabel('')
ax_diff_asn.set_yticklabels([])
ax_diff_asn.set_xticklabels(ax_diff_asn.get_xticklabels(), rotation=label_rot, ha='right')

fig.show()
fig.savefig('combined-heatmaps-fr.pdf', bbox_inches='tight')

#### ================================================================================

# Failure Rates: CPE vs ISP Resolver

In [None]:
from ipaddress import ip_address

In [None]:
full_df['private_dst_addr'] = full_df['dst_address'].map(lambda x: ip_address(x).is_private)

In [None]:
priv_df = full_df[full_df['private_dst_addr']]

In [None]:
priv_df.head()

In [None]:
priv_df['resolver_name'].unique()

In [None]:
probe_res_df = full_df[full_df['resolver_name'] == 'Probe Resolver']

In [None]:
all_probe_res_msm = len(probe_res_df)
all_probe_res_msm

In [None]:
failed_probe_res_msm = len(probe_res_df[probe_res_df['result_rt'].isna()])
failed_probe_res_msm

In [None]:
failed_probe_res_msm / all_probe_res_msm

In [None]:
all_udp_probe_msm = len(probe_res_df[probe_res_df['proto'] == 'UDP'])
all_udp_probe_msm

In [None]:
all_tcp_probe_msm = len(probe_res_df[probe_res_df['proto'] == 'TCP'])
all_tcp_probe_msm

In [None]:
failed_udp_probe_msm = len(probe_res_df[(probe_res_df['proto'] == 'UDP') & (probe_res_df['result_rt'].isna())])
failed_udp_probe_msm

In [None]:
failed_tcp_probe_msm = len(probe_res_df[(probe_res_df['proto'] == 'TCP') & (probe_res_df['result_rt'].isna())])
failed_tcp_probe_msm

In [None]:
failed_tcp_probe_msm / all_tcp_probe_msm

In [None]:
udp_priv_df = priv_df[priv_df['proto'] == 'UDP']
tcp_priv_df = priv_df[priv_df['proto'] == 'TCP']

In [None]:
len(udp_priv_df)

In [None]:
len(tcp_priv_df)

In [None]:
failed_cpe_udp_probe_msm = len(udp_priv_df[udp_priv_df['result_rt'].isna()])
failed_cpe_udp_probe_msm

In [None]:
failed_cpe_udp_probe_msm / failed_udp_probe_msm

In [None]:
failed_cpe_tcp_probe_msm = len(tcp_priv_df[tcp_priv_df['result_rt'].isna()])
failed_cpe_tcp_probe_msm

In [None]:
failed_cpe_tcp_probe_msm / failed_tcp_probe_msm

In [None]:
failed_cpe_tcp_probe_msm / all_tcp_probe_msm

In [None]:
ok_cpe_tcp_probe_msm = len(tcp_priv_df[tcp_priv_df['result_rt'].notna()])
ok_cpe_tcp_probe_msm

#### ================================================================================

# Failure Reasons - part of Table 3 in paper

In [None]:
measurements_results_failures = full_df[full_df['result_rt'].isna()]

In [None]:
measurements_results_failures

In [None]:
measurements_failures = measurements_results_failures[['proto', 'err_msg', 'resolver_name']]
measurements_failures['err_msg'] = measurements_failures['err_msg'].astype('str')
measurements_failures['err_msg'] = np.where(measurements_failures['err_msg'].str.contains('TU', regex = False), 'TUCONNECT', measurements_failures['err_msg'])
measurements_failures['err_msg'] = np.where(measurements_failures['err_msg'].str.contains('socket', regex = False), 'SOCKET', measurements_failures['err_msg'])
measurements_failures['resolver'] = np.where(measurements_failures['resolver_name'] != 'Probe Resolver', 'public', 'probe')
measurements_failures


# Dataset Overview Table: Failure Reasons

## TCP

In [None]:
tcp_failures = measurements_failures[measurements_failures['proto'] == 'TCP']

In [None]:
tcp_failures

In [None]:
tcp_failures_probe = tcp_failures[tcp_failures['resolver'] == 'probe']
tcp_failures_probe = tcp_failures_probe.groupby(['err_msg', 'resolver']).size().reset_index().rename(columns={0 : 'cnt'})
tcp_failures_probe['perc'] = (tcp_failures_probe['cnt'] / 454151).round(5)
tcp_failures_probe = tcp_failures_probe.sort_values('cnt', ascending=False)

In [None]:
print(tcp_failures_probe[['cnt', 'perc', 'err_msg']].to_latex(index=False))

In [None]:
tcp_failures_public = tcp_failures[tcp_failures['resolver'] == 'public']
tcp_failures_public = tcp_failures_public.groupby(['err_msg', 'resolver']).size().reset_index().rename(columns={0 : 'cnt'})
tcp_failures_public['perc'] = (tcp_failures_public['cnt'] / 4655635).round(5)
tcp_failures_public = tcp_failures_public.sort_values('cnt', ascending=False)

In [None]:
print(tcp_failures_public[['cnt', 'perc', 'err_msg']].to_latex(index=False))

In [None]:
tcp_failures_public_breakdown = tcp_failures[tcp_failures['resolver'] == 'public']
tcp_failures_public_breakdown_total = tcp_failures_public_breakdown.groupby(['resolver_name']).size().reset_index().rename(columns={0 : 'total'})
tcp_failures_public_breakdown = tcp_failures_public_breakdown.groupby(['err_msg', 'resolver_name']).size().reset_index().rename(columns={0 : 'failures'})
tcp_failures_public_breakdown = tcp_failures_public_breakdown.merge(tcp_failures_public_breakdown_total, how='left', left_on='resolver_name', right_on='resolver_name')
tcp_failures_public_breakdown['perc'] = (tcp_failures_public_breakdown['failures'] / tcp_failures_public_breakdown['total']).round(2)
tcp_failures_public_breakdown = tcp_failures_public_breakdown.pivot(index='err_msg', columns='resolver_name', values='perc')

In [None]:
tcp_failures_public_breakdown

## UDP

In [None]:
udp_failures = measurements_failures[measurements_failures['proto'] == 'UDP']
pd.options.display.max_colwidth = 100

In [None]:
udp_failures

In [None]:
udp_failures_probe = udp_failures[udp_failures['resolver'] == 'probe']
udp_failures_probe = udp_failures_probe.groupby(['err_msg', 'resolver']).size().reset_index().rename(columns={0 : 'cnt'})
udp_failures_probe['perc'] = (udp_failures_probe['cnt'] / 454417).round(5)
udp_failures_probe = udp_failures_probe.sort_values('cnt', ascending=False)


In [None]:
print(udp_failures_probe[['cnt', 'perc', 'err_msg']].to_latex(index=False))

In [None]:
udp_failures_public = udp_failures[udp_failures['resolver'] == 'public']
udp_failures_public = udp_failures_public.groupby(['err_msg', 'resolver']).size().reset_index().rename(columns={0 : 'cnt'})
udp_failures_public['perc'] = (udp_failures_public['cnt'] / 4656086).round(5)
udp_failures_public = udp_failures_public.sort_values('cnt', ascending=False)


In [None]:
print(udp_failures_public[['cnt', 'perc', 'err_msg']].to_latex(index=False))

#### ================================================================================

# Plot Regional Heatmaps - Response time variance

In [None]:
udp_total_var = rt_merged.groupby('resolver_name', as_index=False)['result_rt_udp'].std()
udp_total_var['continent_code'] = 'Total'
udp_total_var = udp_total_var[['resolver_name', 'continent_code', 'result_rt_udp']]

In [None]:
udp_public_var_resolver = []
udp_public_var_resolver.insert(0, {'resolver_name': 'Public Resolver', 'continent_code': 'Total', 'result_rt_udp': rt_merged['result_rt_udp'].std()})
udp_total_var = pd.concat([pd.DataFrame(udp_public_var_resolver), udp_total_var], ignore_index=True)

In [None]:
udp_total_var

In [None]:
tcp_total_var = rt_merged.groupby('resolver_name', as_index=False)['result_rt_tcp'].std()
tcp_total_var['continent_code'] = 'Total'
tcp_total_var = tcp_total_var[['resolver_name', 'continent_code', 'result_rt_tcp']]

In [None]:
tcp_public_var_resolver = []
tcp_public_var_resolver.insert(0, {'resolver_name': 'Public Resolver', 'continent_code': 'Total', 'result_rt_tcp': rt_merged['result_rt_tcp'].std()})
tcp_total_var = pd.concat([pd.DataFrame(tcp_public_var_resolver), tcp_total_var], ignore_index=True)

In [None]:
tcp_total_var

In [None]:
udp_heatmap_var = rt_merged.groupby(['resolver_name', 'continent_code'], as_index=False
                               )['result_rt_udp'].std().append(udp_total_var)
udp_heatmap_public_resolver_var = rt_merged.groupby(['continent_code'], as_index=False
                               )['result_rt_udp'].std()

udp_heatmap_public_resolver_var['resolver_name'] = 'Public Resolver'
udp_heatmap_var = pd.concat([udp_heatmap_public_resolver_var, udp_heatmap_var], ignore_index=True)
udp_heatmap_var
udp_heatmap_var = pd.crosstab(udp_heatmap_var['resolver_name'], udp_heatmap_var['continent_code'],
                          values=udp_heatmap_var['result_rt_udp'], aggfunc='median'
                         )

In [None]:
tcp_heatmap_var = rt_merged.groupby(['resolver_name', 'continent_code'], as_index=False
                               )['result_rt_tcp'].std().append(tcp_total_var)
tcp_heatmap_public_resolver_var = rt_merged.groupby(['continent_code'], as_index=False
                               )['result_rt_tcp'].std()
tcp_heatmap_public_resolver_var['resolver_name'] = 'Public Resolver'
tcp_heatmap_var = pd.concat([tcp_heatmap_public_resolver_var, tcp_heatmap_var], ignore_index=True)
tcp_heatmap_var = pd.crosstab(tcp_heatmap_var['resolver_name'], tcp_heatmap_var['continent_code'],
                          values=tcp_heatmap_var['result_rt_tcp'], aggfunc='median'
                         )

In [None]:
# move Probe Resolver to the bottom and Public Resolver to the top
udp_heatmap_var = udp_heatmap_var[udp_heatmap_var.index != 'Probe Resolver'].append(udp_heatmap_var.loc['Probe Resolver'])
udp_heatmap_public_resolver_var = udp_heatmap_var[udp_heatmap_var.index == 'Public Resolver']
udp_heatmap_tmp_var = udp_heatmap_var[udp_heatmap_var.index != 'Public Resolver']
udp_heatmap_var = udp_heatmap_public_resolver_var.append(udp_heatmap_tmp_var)

tcp_heatmap_var = tcp_heatmap_var[tcp_heatmap_var.index != 'Probe Resolver'].append(tcp_heatmap_var.loc['Probe Resolver'])
tcp_heatmap_public_resolver_var = tcp_heatmap_var[tcp_heatmap_var.index == 'Public Resolver']
tcp_heatmap_tmp_var = tcp_heatmap_var[tcp_heatmap_var.index != 'Public Resolver']
tcp_heatmap_var = tcp_heatmap_public_resolver_var.append(tcp_heatmap_tmp_var)

In [None]:
udp_heatmap_var

In [None]:
tcp_heatmap_var

In [None]:
((tcp_heatmap_var['Total'] / udp_heatmap_var['Total']) -1 ) * 100  # relative increase from DoUDP to DoTCP

In [None]:
diff_heatmap_var = tcp_heatmap_var - udp_heatmap_var

In [None]:
diff_heatmap_var

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(6, 6.5), gridspec_kw = {'wspace':0, 'hspace':0.15}, sharex=True)
ax1 = sns.heatmap(tcp_heatmap_var,
            annot=True,
            cmap='RdYlGn_r', #'RdYlBu_r',  # "RdYlGn_r",
            cbar_kws={'label' : 'Response Time [ms]'},
            ax=ax1,
            fmt='.1f',
            vmin=0,
            vmax=600,
#             xticklabels=False
           )
ax1.set_xlabel('')
ax1.set_ylabel('')

ax2 = sns.heatmap(diff_heatmap_var,
            annot=True,
            cmap='RdBu_r', #"RdYlGn_r",
            cbar_kws={'label' : 'Response Time Difference [ms]'},
            ax=ax2,
            fmt='.1f',
            vmin=-200,
            vmax=200,
           )
ax2.set_xlabel('')
ax2.set_ylabel('')

fig.show()
fig.savefig('diff-rt-stddev-heatmap-CC-merged.pdf', bbox_inches='tight')

# Plot ASN Heatmaps - Response time variance

### Data Prep

In [None]:
rt_pr_asn_udp_var = udp_rt.groupby(['probe_id', 'resolver_name', 'dst_address', 'asn'],
                           as_index=False)['result_rt'].std()

rt_pr_asn_tcp_var = tcp_rt.groupby(['probe_id', 'resolver_name', 'dst_address', 'asn'],
                           as_index=False)['result_rt'].std()

In [None]:
rt_pr_asn_udp_var

In [None]:
rt_pr_asn_tcp_var

In [None]:
# inner join will remove all probe resolvers that do not respond over DoTCP
rt_asn_merged_var = rt_pr_asn_udp_var.merge(rt_pr_asn_tcp_var, on=['probe_id', 'resolver_name', 'dst_address', 'asn'
                                          ], suffixes=['_udp', '_tcp'])

In [None]:
rt_asn_merged_var

### Get AS with the Most Probes

In [None]:
probe_ids_var = rt_asn_merged_var.drop_duplicates(subset=['probe_id'])
probe_ids_var = probe_ids_var[['probe_id', 'asn']]
probe_ids_var

In [None]:
probes_per_asn_var = probe_ids_var.groupby('asn', as_index = False).size()
probes_per_asn_top_10_var = probes_per_asn_var.nlargest(10, 'size')
probes_per_asn_top_10_var['asn_name'] = probes_per_asn_top_10_var.apply(lambda row : lookupASName(row['asn']), axis = 1)
probes_per_asn_top_10_var.at[32, 'asn_name'] = 'Orange S.A.'
probes_per_asn_top_10_var

### Filter RTs to Top 10 AS

In [None]:
rt_asn_merged_top_10_var = rt_asn_merged_var[rt_asn_merged_var['asn'].isin(probes_per_asn_top_10_var['asn'])]
rt_asn_merged_top_10_var

In [None]:
rt_asn_merged_top_10_var = rt_asn_merged_top_10_var.merge(probes_per_asn_top_10_var, how='left', left_on='asn', right_on='asn')
rt_asn_merged_top_10_var[(rt_asn_merged_top_10_var['asn_name'] == 'Orange S.A.') & (rt_asn_merged_top_10_var['resolver_name'] == 'OpenNIC')]

# Orange S.A. on openNIC is empty, which is expected

In [None]:
udp_asn_heatmap_var = rt_asn_merged_top_10_var.groupby(['resolver_name', 'asn_name'], as_index=False
                               )['result_rt_udp'].std()
udp_asn_heatmap_public_resolver_var = rt_asn_merged_top_10_var.groupby(['asn_name'], as_index=False
                               )['result_rt_udp'].std()
udp_asn_heatmap_public_resolver_var['resolver_name'] = 'Public Resolver'
udp_asn_heatmap_var = pd.concat([udp_asn_heatmap_public_resolver_var, udp_asn_heatmap_var], ignore_index=True)
udp_asn_heatmap_var = pd.crosstab(udp_asn_heatmap_var['resolver_name'], udp_asn_heatmap_var['asn_name'],
                          values=udp_asn_heatmap_var['result_rt_udp'], aggfunc='median'
                         )

In [None]:
tcp_asn_heatmap_var = rt_asn_merged_top_10_var.groupby(['resolver_name', 'asn_name'], as_index=False
                               )['result_rt_tcp'].std()
tcp_asn_heatmap_public_resolver_var = rt_asn_merged_top_10_var.groupby(['asn_name'], as_index=False
                               )['result_rt_tcp'].std()
tcp_asn_heatmap_public_resolver_var['resolver_name'] = 'Public Resolver'
tcp_asn_heatmap_var = pd.concat([tcp_asn_heatmap_public_resolver_var, tcp_asn_heatmap_var], ignore_index=True)
tcp_asn_heatmap_var = pd.crosstab(tcp_asn_heatmap_var['resolver_name'], tcp_asn_heatmap_var['asn_name'],
                          values=tcp_asn_heatmap_var['result_rt_tcp'], aggfunc='median'
                         )

In [None]:
# move Probe Resolver to the bottom and Public Resolver to the top
udp_asn_heatmap_var = udp_asn_heatmap_var[udp_asn_heatmap_var.index != 'Probe Resolver'].append(udp_asn_heatmap_var.loc['Probe Resolver'])
udp_asn_heatmap_public_resolver_var = udp_asn_heatmap_var[udp_asn_heatmap_var.index == 'Public Resolver']
udp_asn_heatmap_tmp_var = udp_asn_heatmap_var[udp_asn_heatmap_var.index != 'Public Resolver']
udp_asn_heatmap_var = udp_asn_heatmap_public_resolver_var.append(udp_asn_heatmap_tmp_var)

tcp_asn_heatmap_var = tcp_asn_heatmap_var[tcp_asn_heatmap_var.index != 'Probe Resolver'].append(tcp_asn_heatmap_var.loc['Probe Resolver'])
tcp_asn_heatmap_public_resolver_var = tcp_asn_heatmap_var[tcp_asn_heatmap_var.index == 'Public Resolver']
tcp_asn_heatmap_tmp_var = tcp_asn_heatmap_var[tcp_asn_heatmap_var.index != 'Public Resolver']
tcp_asn_heatmap_var = tcp_asn_heatmap_public_resolver_var.append(tcp_asn_heatmap_tmp_var)

In [None]:
udp_asn_heatmap_var

In [None]:
tcp_asn_heatmap_var

In [None]:
diff_asn_heatmap_var = tcp_asn_heatmap_var - udp_asn_heatmap_var

In [None]:
diff_asn_heatmap_var

In [None]:
probes_per_asn_top_10_var['asn_name'].unique()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(8, 6.5), gridspec_kw = {'wspace':0, 'hspace':0.15}, sharex=True)
ax1 = sns.heatmap(tcp_asn_heatmap_var[probes_per_asn_top_10_var['asn_name'].unique()],
            annot=True,
            cmap='RdYlGn_r', #'RdYlBu_r',  # "RdYlGn_r",
            cbar_kws={'label' : 'Response Time [ms]'},
            ax=ax1,
            fmt='.1f',
            vmin=0,
            vmax=300,
#             xticklabels=False
           )
ax1.set_xlabel('')
ax1.set_ylabel('')

ax2 = sns.heatmap(diff_asn_heatmap_var[probes_per_asn_top_10_var['asn_name'].unique()],
            annot=True,
            cmap='RdBu_r', #"RdYlGn_r",
            cbar_kws={'label' : 'Response Time Difference [ms]'},
            ax=ax2,
            fmt='.1f',
            vmin=-200,
            vmax=200,
           )
ax2.set_xlabel('')
ax2.set_ylabel('')

fig.show()
fig.savefig('diff-rt-stddev-heatmap-ASN-merged.pdf', bbox_inches='tight')