# Performance

In [None]:
import numpy as np
import sqlite3 as sq
import matplotlib.pyplot as plt
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
from collections import Counter
import requests
from mpl_toolkits.basemap import Basemap
import time
import json
import tldextract

class Colors:
    udp = "b"
    tcp = "g"
    tls = "gray"
    https = "m"
    quic = "c"

%matplotlib inline

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [None]:
def buildExtractedDomain(extracted):
    result = ""
    if extracted.subdomain != "" and extracted.subdomain != "*":
        result += extracted.subdomain + "."
    if extracted.domain != "" and extracted.suffix != "":
        result += extracted.domain + "." + extracted.suffix
    return result

def mapToCommonName(info):
    commonName = buildExtractedDomain(tldextract.extract(info["CommonName"]))
    if commonName != "":
        return commonName
    if info["DNSNames"] == None:
        return None
    return buildExtractedDomain(tldextract.extract(info["DNSNames"][0]))

def calculateCDF(items):
    count, bins_count = np.histogram(items, bins="auto")

    pdf = count / sum(count)

    # using numpy np.cumsum to calculate the CDF
    # We can also find using the PDF values by looping and adding
    cdf = np.cumsum(pdf)
    return bins_count[1:], cdf

def setBoxesWhite(bp):
    for patch in bp['boxes']:
        patch.set_facecolor("white")

In [None]:
params = {'legend.fontsize': 'x-large',
         'axes.labelsize': 'x-large',
         'axes.titlesize':'x-large',
         'xtick.labelsize':'x-large',
         'ytick.labelsize':'x-large'}

plt.rcParams.update(params)

In [None]:
connection_final = sq.connect("./performance/measurements.w03.db")
connection_final.row_factory = sq.Row

connection_misc = sq.connect("./misc/merged-complete.db")
connection_misc.row_factory = sq.Row

# ---------------------------------------------------------------------------------

# Data preparation

In [None]:
#load dns_measurements w03
measurements = pd.read_sql_query("SELECT * FROM dns_measurements WHERE NOT cache_warming", connection_final)
measurements

In [None]:
print("all measurements: " + str(len(measurements)))
print('doq: ' + str(len(measurements[measurements['protocol'] == 'quic'])))
print('doh: ' + str(len(measurements[measurements['protocol'] == 'https'])))
print('dot: ' + str(len(measurements[measurements['protocol'] == 'tls'])))
print('dotcp: ' + str(len(measurements[measurements['protocol'] == 'tcp'])))
print('doudp: ' + str(len(measurements[measurements['protocol'] == 'udp'])))


## Account for QUIC targetting 3 ports

In [None]:
# merge misc ip/port combinations from week 2022-02 with measurements -> this removes all ports on resolvers which were not doq_verified

sqlQuicPorts = """
select 
     ip, port
from q_versions
where strftime('%Y-%W', created_at) = '2022-02';
"""
quic_ports = pd.read_sql_query(sqlQuicPorts, connection_misc)
quic_ports


In [None]:
measurements_quic = measurements[measurements['protocol'] == 'quic']
measurements_quic = measurements_quic.merge(quic_ports, on=['ip', 'port'])
measurements_quic

In [None]:
# drop duplicates on floored timestamp and ip -> this removes resolvers with doubled ports (e.g., adguard)

measurements_quic['created_datetime'] = pd.to_datetime(measurements_quic['created'], unit='s') 
measurements_quic['created_datetime_hour'] = measurements_quic['created_datetime'].dt.floor('h')
measurements_quic_cleaned = measurements_quic.drop_duplicates(['ip', 'created_datetime_hour'])
measurements_quic_cleaned


In [None]:
measurements = measurements[measurements['protocol'] != 'quic']
measurements = pd.concat([measurements, measurements_quic_cleaned])
measurements = measurements.drop(columns=['created_datetime', 'created_datetime_hour'])
measurements

In [None]:
print("all measurements: " + str(len(measurements)))
print('doq: ' + str(len(measurements[measurements['protocol'] == 'quic'])))
print('doh: ' + str(len(measurements[measurements['protocol'] == 'https'])))
print('dot: ' + str(len(measurements[measurements['protocol'] == 'tls'])))
print('dotcp: ' + str(len(measurements[measurements['protocol'] == 'tcp'])))
print('doudp: ' + str(len(measurements[measurements['protocol'] == 'udp'])))


### All measurements of Resolvers which answered at least once with an r_code for every protocol

In [None]:
# number of resolvers per protocol

measuremens_ips_quic_responsive = set(measurements[~(measurements['r_code'].isna()) & (measurements['protocol'] == 'quic')]['ip'].unique())
measuremens_ips_https_responsive = set(measurements[~(measurements['r_code'].isna()) & (measurements['protocol'] == 'https')]['ip'].unique())
measuremens_ips_tls_responsive = set(measurements[~(measurements['r_code'].isna()) & (measurements['protocol'] == 'tls')]['ip'].unique())
measuremens_ips_tcp_responsive = set(measurements[~(measurements['r_code'].isna()) & (measurements['protocol'] == 'tcp')]['ip'].unique())
measuremens_ips_udp_responsive = set(measurements[~(measurements['r_code'].isna()) & (measurements['protocol'] == 'udp')]['ip'].unique())

print('doq: ' + str(len(measuremens_ips_quic_responsive)))
print('doh: ' + str(len(measuremens_ips_https_responsive)))
print('dot: ' + str(len(measuremens_ips_tls_responsive)))
print('dotcp: ' + str(len(measuremens_ips_tcp_responsive)))
print('doudp: ' + str(len(measuremens_ips_udp_responsive)))


In [None]:
# number of resolvers supporting every protocol
measuremens_ips_all_responsive = set.intersection(measuremens_ips_quic_responsive, measuremens_ips_https_responsive, measuremens_ips_tls_responsive, measuremens_ips_tcp_responsive, measuremens_ips_udp_responsive)
len(measuremens_ips_all_responsive)


In [None]:
measurements_responsive = measurements[measurements['ip'].isin(measuremens_ips_all_responsive)]

In [None]:
print("all measurements: " + str(len(measurements_responsive)))
print('doq: ' + str(len(measurements_responsive[measurements_responsive['protocol'] == 'quic'])))
print('doh: ' + str(len(measurements_responsive[measurements_responsive['protocol'] == 'https'])))
print('dot: ' + str(len(measurements_responsive[measurements_responsive['protocol'] == 'tls'])))
print('dotcp: ' + str(len(measurements_responsive[measurements_responsive['protocol'] == 'tcp'])))
print('doudp: ' + str(len(measurements_responsive[measurements_responsive['protocol'] == 'udp'])))


# ---------------------------------------------------------------------------------

# Meta

## Stats

In [None]:
# total
measurements_responsive_per_protocol = measurements_responsive.groupby('protocol').size().reset_index(name = "total")

mask = (~measurements_responsive['r_code'].isna()) & (measurements_responsive['error'].isnull()) & (~measurements_responsive['total_time'].isna()) & (measurements_responsive['total_time'] < 5e9)

# successful
measurements_successful = measurements_responsive[mask]
measurements_successful_per_protocol = measurements_successful.groupby('protocol').size().reset_index(name = "sucessful")

measuremens_ips_quic_successful = set(measurements_successful[measurements_successful['protocol'] == 'quic']['ip'].unique())
measuremens_ips_https_successful = set(measurements_successful[measurements_successful['protocol'] == 'https']['ip'].unique())
measuremens_ips_tls_successful = set(measurements_successful[measurements_successful['protocol'] == 'tls']['ip'].unique())
measuremens_ips_tcp_successful = set(measurements_successful[measurements_successful['protocol'] == 'tcp']['ip'].unique())
measuremens_ips_udp_successful = set(measurements_successful[measurements_successful['protocol'] == 'udp']['ip'].unique())

measuremens_ips_all_successful = set.intersection(measuremens_ips_quic_successful, measuremens_ips_https_successful, measuremens_ips_tls_successful, measuremens_ips_tcp_successful, measuremens_ips_udp_successful)
print(len(measuremens_ips_all_responsive))
# 264 resolvers are also successful


# failed
measurements_failed = measurements_responsive[~mask]
measurements_failed_per_protocol = measurements_failed.groupby('protocol').size().reset_index(name = "failed")


In [None]:
measurements_stats = measurements_responsive_per_protocol.merge(measurements_successful_per_protocol)
measurements_stats = measurements_stats.merge(measurements_failed_per_protocol)
measurements_stats['failed_rel'] = (measurements_stats['failed'] / measurements_stats['total']).map(" {:.2%}".format) 
measurements_stats


In [None]:
measurements_stats_pivot = measurements_stats.transpose()
measurements_stats_pivot_header = measurements_stats_pivot.iloc[0]
measurements_stats_pivot = measurements_stats_pivot[1:]
measurements_stats_pivot.columns = measurements_stats_pivot_header
measurements_stats_pivot.rename(columns={'https':'DoH','quic':'DoQ','tcp':'DoTCP','tls':'DoT','udp':'DoUDP'}, inplace=True)
measurements_stats_pivot.rename({'total':'Total','sucessful':'Successful','failed_rel':'Failure Rate'}, axis='index', inplace=True)
measurements_stats_pivot = measurements_stats_pivot.drop(['failed'])
measurements_stats_pivot = measurements_stats_pivot[['DoQ', 'DoUDP', 'DoTCP', 'DoT', 'DoH']]
measurements_stats_pivot
print(measurements_stats_pivot.to_latex())


## Load Common Names from adoption of the week which was used for performance measurements

In [None]:
certs = pd.read_csv("./misc/certs-last.csv", converters={"info":json.loads}, header = None, names = ["ip", "protocol", "port", "info"])
quic_certs_last_week = certs[certs["protocol"] == "quic"]
quic_certs_last_week["common_name"] = quic_certs_last_week["info"].apply(mapToCommonName)

In [None]:
quic_certs_last_week = quic_certs_last_week.drop_duplicates(subset = ['ip', 'common_name'])

In [None]:
count_by_common_name = quic_certs_last_week.groupby("common_name").size().reset_index(name = "count").set_index("common_name")
count_by_common_name["rel"] = (count_by_common_name["count"] / count_by_common_name["count"].sum()) * 100

### nextdns.io

In [None]:
quic_certs_nextdns = quic_certs_last_week[quic_certs_last_week['common_name'] == 'dns.nextdns.io']

In [None]:
nextdns_ips = quic_certs_nextdns['ip']

In [None]:
nextdns_ips_in_performance = set(nextdns_ips).intersection(measuremens_ips_all_responsive)
nextdns_ips_in_performance

# nextdns is not offering all protocols, and is thus not in performance

### AdGuard

In [None]:
quic_certs_adguard = quic_certs_last_week[(quic_certs_last_week['common_name'] == 'dns.adguard.com') | (quic_certs_last_week['common_name'] == '*.d.adguard-dns.com') | (quic_certs_last_week['common_name'] == 'adguard.ch')]
adguard_ips = quic_certs_adguard['ip']
adguard_ips_in_performance = set(adguard_ips).intersection(measuremens_ips_all_responsive)
adguard_ips_in_performance

# adgurad is offering all protocols, and is thus in performance

# ---------------------------------------------------------------------------------

# Regional Distribution

In [None]:
# get regional data from ip addresses
data = {}
try:
    df = pd.read_csv("performance-ip-location.csv")
    for index, row in df.iterrows():
        data[row["ip"]] = row
except:
    pass
for row in measuremens_ips_all_responsive:
    #print(row)
    r = requests.get("http://ip-api.com/json/" + row)
    json = r.json()
    data[row] = {
        "country": json["country"],
        "countryCode": json["countryCode"],
        "region": json["region"],
        "regionName": json["regionName"],
        "city": json["city"],
        "zip": json["zip"],
        "lat": json["lat"],
        "lon": json["lon"],
        "org": json["org"],
        "as": json["as"],
        "ip": row
    }
    time.sleep(1.5)
measuremens_ips_all_responsive_regional = pd.DataFrame.from_dict(data, orient = "index")
measuremens_ips_all_responsive_regional.to_csv("performance-ip-location.csv", index = False)

In [None]:
measuremens_ips_all_responsive_regional

## By Continent

In [None]:
countriesContinents = pd.read_csv("Countries-Continents.csv", index_col = False)
renamed = countriesContinents.columns.tolist()
renamed[0] = "country"
countriesContinents.columns = renamed

def mapToContinent(row):
    if row["region"] == "Americas" and row["intermediate-region"] == "South America":
        return "South America (SA)"
    elif row["region"] == "Americas":
        return "North America (NA)"
    elif row["region"] == "Asia":
        return "Asia (AS)"
    elif row["region"] == "Europe":
        return "Europe (EU)"
    elif row["region"] == "Oceania":
        return "Oceania (OC)"
    elif row["region"] == "Africa":
        return "Africa (AF)"
    return row["region"]

countriesContinents["continent"] = countriesContinents.apply(mapToContinent, axis = 1)

In [None]:
measuremens_ips_all_responsive_regional = pd.merge(measuremens_ips_all_responsive_regional, countriesContinents, on = "country")
measuremens_ips_all_responsive_regional['continent'].value_counts()


In [None]:
dox_by_continent = pd.concat([measuremens_ips_all_responsive_regional['continent'].value_counts(),
                                     measuremens_ips_all_responsive_regional['continent'].value_counts(normalize=True)], axis=1)
dox_by_continent = dox_by_continent.reset_index() 
dox_by_continent.columns = ['Continent', 'abs', 'rel']
dox_by_continent['rel'] = dox_by_continent['rel'].map(" ({:.2%})".format)
dox_by_continent['DoX verified'] = dox_by_continent['abs'].astype(str) + dox_by_continent['rel']
dox_by_continent = dox_by_continent[['Continent', 'DoX verified']]
dox_by_continent

## World map of resolvers

In [None]:
plt.figure(figsize=(12,6))
worldMap = Basemap(projection='robin', lon_0=0, resolution='l')
worldMap.drawcountries(color='#ffffff', linewidth=0.5)
worldMap.fillcontinents(color='#c0c0c0', lake_color='#ffffff')
x, y = worldMap(measuremens_ips_all_responsive_regional["lon"].tolist(), measuremens_ips_all_responsive_regional["lat"].tolist())
homeX, homeY = worldMap([11.66864063068705], [48.26187023547006])
plt.plot(x, y, 'ro', markersize = 3)
plt.plot(homeX, homeY, 'bo', markersize = 4)
plt.show()


## By ASN

In [None]:
def lookupASName(asn):
    r = requests.get("http://api.asrank.caida.org/v2/restful/asns/" + str(asn))
    json = r.json()
    return json["data"]["asn"]["asnName"]

top_n = 10

target_dist_by_asn = pd.concat([measuremens_ips_all_responsive_regional['as'].value_counts().head(top_n),
                                     measuremens_ips_all_responsive_regional['as'].value_counts(normalize=True).head(top_n)], axis=1)
target_dist_by_asn = target_dist_by_asn.reset_index() 
target_dist_by_asn.columns = ['ASN', 'abs', 'rel']
target_dist_by_asn['rel'] = target_dist_by_asn['rel'].map(" ({:.2%})".format)
target_dist_by_asn_table = target_dist_by_asn

target_dist_by_asn_table['ASN'] = target_dist_by_asn_table['ASN'].str.split(' ', expand=True)[0]
target_dist_by_asn_table['ASN'] = target_dist_by_asn_table['ASN'].str.slice(2)

target_dist_by_asn_table['AS'] = target_dist_by_asn_table.apply(lambda row : lookupASName(row['ASN']), axis=1)

target_dist_asn_list = target_dist_by_asn_table[['ASN']]


In [None]:
target_dist_by_asn_table

In [None]:
len(measuremens_ips_all_responsive_regional['as'].value_counts())

# all ASes observed for DoX verified

# ---------------------------------------------------------------------------------

# TCP

## TFO support

In [None]:
tcp_tfo = pd.read_sql_query("SELECT * FROM fast_open_supports", connection_misc)

In [None]:
tcp_tfo_support = tcp_tfo[tcp_tfo['support'] != 0]
tcp_tfo_support_ips = set(tcp_tfo_support['ip'].unique())
len(tcp_tfo_support_ips)

#208 resolvers with support for tfo

In [None]:
tcp_tfo_support_ips_successful = set(measurements_successful['ip'].unique()).intersection(tcp_tfo_support_ips)

len(tcp_tfo_support_ips_successful)
#0 resolver with successful measurements supports tfo

In [None]:
measuremens_ips_all_responsive_regional[measuremens_ips_all_responsive_regional['ip'].isin(tcp_tfo_support_ips_successful)]


In [None]:
quic_certs_last_week[quic_certs_last_week['ip'].isin(tcp_tfo_support_ips_successful)]


In [None]:
#0 resolver with successful measurements which supports tfo

## edns-tcp-keepalive support

In [None]:
tcp_edns0_keepalive = pd.read_sql_query("SELECT * FROM e_dns0", connection_misc)

In [None]:
tcp_edns0_keepalive_support = tcp_edns0_keepalive[tcp_edns0_keepalive['support'] != 0]
tcp_edns0_keepalive_support_ips = set(tcp_edns0_keepalive_support['ip'].unique())

len(tcp_edns0_keepalive_support_ips)
#29 resolvers with support for edns0-tcp-keepalive

In [None]:
tcp_edns0_keepalive_support_ips_successful = set(measurements_successful['ip'].unique()).intersection(tcp_edns0_keepalive_support_ips)
len(tcp_edns0_keepalive_support_ips_successful)

#18 resolver with successful measurements support edns0-tcp-keepalive

In [None]:
len(quic_certs_last_week[quic_certs_last_week['ip'].isin(tcp_edns0_keepalive_support_ips_successful)])
#18 resolvers with successful measurements which support edns0-tcp-keepalive are all adguard


In [None]:
set(tcp_edns0_keepalive_support['timeout'] != 0)
#all resolvers have a timeout value of 0, which signals to close the connection after having received the response.


# ---------------------------------------------------------------------------------

## Account for Traceroutes

In [None]:
traceroutes_destinations = pd.read_sql_query("SELECT * FROM traceroutes where dest_ip = hop_ip", connection_final)

In [None]:
# remove traceroutes which are invalid
traceroutes_destinations_successful = traceroutes_destinations[(~traceroutes_destinations['ttl'].isna()) & (traceroutes_destinations['rtt'] < 5e9)]


In [None]:
# drop duplicates on dns_measurement_id
traceroutes_destinations_successful_cleaned = traceroutes_destinations_successful.drop_duplicates(['dns_measurement_id'])


In [None]:
# save udp, as we do not consider udp traceroutes
measurements_successful_udp = measurements_successful[measurements_successful['protocol'] == 'udp']
measurements_successful = measurements_successful[measurements_successful['protocol'] != 'udp']


In [None]:
# merge with measurements_successful
measurements_traceroutes_successful = measurements_successful.merge(traceroutes_destinations_successful_cleaned, left_on=['id', 'protocol'], right_on=['dns_measurement_id', 'protocol'])


In [None]:
print(len(measurements_successful))
print(len(measurements_traceroutes_successful))

In [None]:
# select only ips which have successful traceroute measurements on all protocols with handshakes
measurements_traceroutes_successful_ips_doq = set(measurements_traceroutes_successful[measurements_traceroutes_successful['protocol'] == 'quic']['dest_ip'].unique())
measurements_traceroutes_successful_ips_doh = set(measurements_traceroutes_successful[measurements_traceroutes_successful['protocol'] == 'https']['dest_ip'].unique())
measurements_traceroutes_successful_ips_dot = set(measurements_traceroutes_successful[measurements_traceroutes_successful['protocol'] == 'tls']['dest_ip'].unique())
measurements_traceroutes_successful_ips_dotcp = set(measurements_traceroutes_successful[measurements_traceroutes_successful['protocol'] == 'tcp']['dest_ip'].unique())
measurements_traceroutes_successful_ips_udp = set(measurements_successful_udp['ip'])

measurements_traceroutes_successful_ips_all= set.intersection(measurements_traceroutes_successful_ips_doq, measurements_traceroutes_successful_ips_doh, measurements_traceroutes_successful_ips_dot, measurements_traceroutes_successful_ips_dotcp, measurements_traceroutes_successful_ips_udp)
measurements_traceroutes_successful_all = measurements_traceroutes_successful[measurements_traceroutes_successful['dest_ip'].isin(measurements_traceroutes_successful_ips_all)]

measurements_traceroutes_successful_all

In [None]:
# add udp
measurements_traceroutes_successful_all= measurements_traceroutes_successful_all.append(measurements_successful_udp[measurements_successful_udp['ip'].isin(measurements_traceroutes_successful_ips_all)])


In [None]:
print("all traceroute measurements: " + str(len(measurements_traceroutes_successful_ips_all)))
print('doq: ' + str(len(measurements_traceroutes_successful_ips_doq)))
print('doh: ' + str(len(measurements_traceroutes_successful_ips_doh)))
print('dot: ' + str(len(measurements_traceroutes_successful_ips_dot)))
print('dotcp: ' + str(len(measurements_traceroutes_successful_ips_dotcp)))
print('doudp: ' + str(len(set(measurements_traceroutes_successful[measurements_traceroutes_successful['protocol'] == 'udp']['dest_ip'].unique()))))

# ---------------------------------------------------------------------------------

# DoQ

In [None]:
doq_successful = measurements_traceroutes_successful_all[measurements_traceroutes_successful_all['protocol'] == 'quic']

### 0-RTT support

In [None]:
quic_0_rtt = pd.read_sql_query("SELECT * FROM q0_rtt_supports", connection_misc)

In [None]:
quic_0_rtt[quic_0_rtt['support'] != 0]

# no resolver with 0-RTT support

# ---------------------------------------------------------------------------------

# DoH

In [None]:
doh_successful = measurements_traceroutes_successful_all[measurements_traceroutes_successful_all['protocol'] == 'https']

### TLS versions

In [None]:
doh_successful_tls = doh_successful.groupby('tls_version').size().reset_index(name = "total")
doh_successful_tls


In [None]:
doh_successful_tls['total'][1] / doh_successful_tls['total'].sum()


### HTTP versions

In [None]:
doh_successful_http = doh_successful.groupby('http_version').size().reset_index(name = "total")
doh_successful_http


In [None]:
doh_successful_http['total'][1] / doh_successful_http['total'].sum()

# ---------------------------------------------------------------------------------

# DoT

In [None]:
dot_successful = measurements_traceroutes_successful_all[measurements_traceroutes_successful_all['protocol'] == 'tls']

### TLS versions

In [None]:
dot_successful_tls = dot_successful.groupby('tls_version').size().reset_index(name = "total")
dot_successful_tls


# ---------------------------------------------------------------------------------

# DoTCP

In [None]:
dotcp_successful = measurements_traceroutes_successful_all[measurements_traceroutes_successful_all['protocol'] == 'tcp']

# ---------------------------------------------------------------------------------

# DoUDP

In [None]:
doudp_successful = measurements_traceroutes_successful_all[measurements_traceroutes_successful_all['protocol'] == 'udp']

# ---------------------------------------------------------------------------------

# Protocol Comparison

## Resolve Times

In [None]:
doq_query_time = list(doq_successful['query_time'] / 1000000)
doh_query_time = list(doh_successful['query_time'] / 1000000)
dot_query_time = list(dot_successful['query_time'] / 1000000)
dotcp_query_time = list(dotcp_successful['query_time'] / 1000000)
doudp_query_time = list(doudp_successful['query_time'] / 1000000)

In [None]:
fig = plt.figure(figsize = (10, 7))
ax = fig.add_subplot(111)
bp = ax.boxplot([doq_query_time, doh_query_time, dot_query_time, dotcp_query_time, doudp_query_time], vert = 0, sym = "", patch_artist=True, meanline = True, showmeans = True)
setBoxesWhite(bp)

for median in bp["medians"]:
    x, y = median.get_data()
    ax.text(x[0] - 9, y[0] - 0.2, "{:.0f}".format(x[0]), fontsize = 9, color='black')

    
ax.set_yticklabels(["DoQ", "DoH", "DoT", "DoTCP", "DoUDP"])

ax.set_xlabel('ms', fontsize = 12)
ax.set_ylabel("Protocol", fontsize = 12)
plt.grid()

plt.show(bp)

In [None]:
print('Samples:')
print(len(doq_query_time))
print(len(doh_query_time))
print(len(dot_query_time))
print(len(dotcp_query_time))
print(len(doudp_query_time))

In [None]:
print('Means:')
print(np.mean(doq_query_time))
print(np.mean(doh_query_time))
print(np.mean(dot_query_time))
print(np.mean(dotcp_query_time))
print(np.mean(doudp_query_time))

print('Rate DoQ to DoUDP mean: ' + str(np.mean(doq_query_time) / np.mean(doudp_query_time)))
print('Rate DoQ to DoH mean: ' + str(np.mean(doq_query_time) / np.mean(doh_query_time)))

In [None]:
print('Medians:')
print(np.median(doq_query_time))
print(np.median(doh_query_time))
print(np.median(dot_query_time))
print(np.median(dotcp_query_time))
print(np.median(doudp_query_time))

print('Rate DoQ to DoUDP median: ' + str(np.median(doudp_query_time) / np.median(doq_query_time)))
print('Rate DoQ to DoTCP median: ' + str(np.median(dotcp_query_time) / np.median(doq_query_time)))

In [None]:
fig, ax = plt.subplots(figsize = (7, 5))

plt.plot(calculateCDF(doudp_query_time)[0], calculateCDF(doudp_query_time)[1], label="DoUDP", color = Colors.udp)
plt.plot(calculateCDF(dotcp_query_time)[0], calculateCDF(dotcp_query_time)[1], label="DoTCP", color = Colors.tcp)
plt.plot(calculateCDF(dot_query_time)[0], calculateCDF(dot_query_time)[1], label="DoT", color = Colors.tls)
plt.plot(calculateCDF(doh_query_time)[0], calculateCDF(doh_query_time)[1], label="DoH", color = Colors.https)
plt.plot(calculateCDF(doq_query_time)[0], calculateCDF(doq_query_time)[1], label="DoQ", color = Colors.quic)

handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1])

plt.xlim([0, 400])
plt.ylim([0, 1.05])
plt.xticks([100,200,300,400])

plt.xlabel("ms", fontsize = 12)
plt.ylabel("CDF", fontsize = 12)

#plt.legend()
plt.grid()

plt.show()

## Handshake time

In [None]:
doq_handshake_time = list(doq_successful['q_ui_c_handshake_duration'] / 1000000)
doh_handshake_time = list((doh_successful['tcp_handshake_duration'] + doh_successful['tls_handshake_duration']) / 1000000)
dot_handshake_time = list((dot_successful['tcp_handshake_duration'] + dot_successful['tls_handshake_duration']) / 1000000)
dotcp_handshake_time = list(dotcp_successful['tcp_handshake_duration'] / 1000000)

In [None]:
fig = plt.figure(figsize = (10, 7))
ax = fig.add_subplot(111)
bp = ax.boxplot([doq_handshake_time, doh_handshake_time, dot_handshake_time, dotcp_handshake_time], vert = 0, sym = "", patch_artist=True, meanline = True, showmeans = True)
setBoxesWhite(bp)

for median in bp["medians"]:
    x, y = median.get_data()
    ax.text(x[0] - 9, y[0] - 0.2, "{:.0f}".format(x[0]), fontsize = 9, color='black')

    
ax.set_yticklabels(["DoQ", "DoH", "DoT", "DoTCP"])

ax.set_xlabel('ms', fontsize = 12)
ax.set_ylabel("Protocol", fontsize = 12)
plt.grid()

plt.show(bp)

In [None]:
print('Samples:')
print(len(doq_handshake_time))
print(len(doh_handshake_time))
print(len(dot_handshake_time))
print(len(dotcp_handshake_time))


In [None]:
print('Means:')
print(np.mean(doq_handshake_time))
print(np.mean(doh_handshake_time))
print(np.mean(dot_handshake_time))
print(np.mean(dotcp_handshake_time))

In [None]:
print('Medians:')
print(np.median(doq_handshake_time))
print(np.median(doh_handshake_time))
print(np.median(dot_handshake_time))
print(np.median(dotcp_handshake_time))

In [None]:
fig = plt.figure(figsize = (7, 5))

plt.plot(calculateCDF(doq_handshake_time)[0], calculateCDF(doq_handshake_time)[1], label="DoQ", color = Colors.quic)
plt.plot(calculateCDF(doh_handshake_time)[0], calculateCDF(doh_handshake_time)[1], label="DoH", color = Colors.https)
plt.plot(calculateCDF(dot_handshake_time)[0], calculateCDF(dot_handshake_time)[1], label="DoT", color = Colors.tls)
plt.plot(calculateCDF(dotcp_handshake_time)[0], calculateCDF(dotcp_handshake_time)[1], label="DoTCP", color = Colors.tcp)

plt.xlim([0, 1000])
plt.ylim([0, 1.05])
plt.xticks([100,200,300,400,500,600,700,800,900,1000])

plt.xlabel("ms", fontsize = 12)
plt.ylabel("CDF", fontsize = 12)

plt.legend()
plt.grid()

plt.show()

## Traceroutes

In [None]:
doq_traceroutes_time = list(doq_successful['rtt'] / 1000000)
doh_traceroutes_time = list(doh_successful['rtt'] / 1000000)
dot_traceroutes_time = list(dot_successful['rtt'] / 1000000)
dotcp_traceroutes_time = list(dotcp_successful['rtt'] / 1000000)


In [None]:
print('Traceroutes per protocol:')
print(len(doq_traceroutes_time))
print(len(doh_traceroutes_time))
print(len(dot_traceroutes_time))
print(len(dotcp_traceroutes_time))

In [None]:
print('Mean traceroute times:')
print(np.mean(doq_traceroutes_time))
print(np.mean(doh_traceroutes_time))
print(np.mean(dot_traceroutes_time))
print(np.mean(dotcp_traceroutes_time))

In [None]:
print('Medians:')
print(np.median(doq_traceroutes_time))
print(np.median(doh_traceroutes_time))
print(np.median(dot_traceroutes_time))
print(np.median(dotcp_traceroutes_time))

In [None]:
fig = plt.figure(figsize = (10, 7))
ax = fig.add_subplot(111)
bp = ax.boxplot([doq_traceroutes_time, doh_traceroutes_time, dot_traceroutes_time, dotcp_traceroutes_time], vert = 0, sym = "", patch_artist=True, meanline = True, showmeans = True)
setBoxesWhite(bp)

for median in bp["medians"]:
    x, y = median.get_data()
    ax.text(x[0] - 9, y[0] - 0.2, "{:.0f}".format(x[0]), fontsize = 9, color='black')

    
ax.set_yticklabels(["DoQ", "DoH", "DoT", "DoTCP"])

ax.set_xlabel('ms', fontsize = 12)
ax.set_ylabel("Protocol", fontsize = 12)
plt.grid()

plt.show(bp)

In [None]:
fig = plt.figure(figsize = (7, 5))

plt.plot(calculateCDF(doq_traceroutes_time)[0], calculateCDF(doq_traceroutes_time)[1], label="DoQ", color = Colors.quic)
plt.plot(calculateCDF(doh_traceroutes_time)[0], calculateCDF(doh_traceroutes_time)[1], label="DoH", color = Colors.https)
plt.plot(calculateCDF(dot_traceroutes_time)[0], calculateCDF(dot_traceroutes_time)[1], label="DoT", color = Colors.tls)
plt.plot(calculateCDF(dotcp_traceroutes_time)[0], calculateCDF(dotcp_traceroutes_time)[1], label="DoTCP", color = Colors.tcp)

plt.xlim([0, 400])
plt.ylim([0, 1.05])
plt.xticks([100,200,300,400])

plt.xlabel("ms", fontsize = 12)
plt.ylabel("CDF", fontsize = 12)

plt.legend()
plt.grid()

plt.show()

## Resolve Time + RTT

In [None]:
fig, main_ax = plt.subplots(figsize = (7, 5))

main_ax.plot(calculateCDF(doudp_query_time)[0], calculateCDF(doudp_query_time)[1], label="DoUDP")
main_ax.plot(calculateCDF(dotcp_query_time)[0], calculateCDF(dotcp_query_time)[1], label="DoTCP", color = Colors.tcp)
main_ax.plot(calculateCDF(dot_query_time)[0], calculateCDF(dot_query_time)[1], label="DoT", color = Colors.tls)
main_ax.plot(calculateCDF(doh_query_time)[0], calculateCDF(doh_query_time)[1], label="DoH", color = Colors.https)
main_ax.plot(calculateCDF(doq_query_time)[0], calculateCDF(doq_query_time)[1], label="DoQ", color = Colors.quic)

main_ax.set_xlim([0, 400])
main_ax.set_ylim([0, 1.05])
main_ax.set_xticks(np.arange(0,410,50))
main_ax.set_yticks(np.arange(0,1.05,0.1))

main_ax.set_xlabel("Resolve time [ms]")
main_ax.set_ylabel("CDF")

main_ax.legend()
handles, labels = main_ax.get_legend_handles_labels()
leg = main_ax.legend(handles[::-1], labels[::-1])

for hnd in leg.legendHandles:
    hnd.set_linewidth(5)

main_ax.spines['right'].set_visible(False)
main_ax.spines['top'].set_visible(False)
# main_ax.grid()

# sub_ax = fig.add_axes([0.44, 0.2, 0.45, .25])
sub_ax = fig.add_axes([0.57, 0.25, 0.33, .3])
sub_ax.plot(calculateCDF(dotcp_traceroutes_time)[0], calculateCDF(dotcp_traceroutes_time)[1], label="DoTCP", color = Colors.tcp)
sub_ax.plot(calculateCDF(dot_traceroutes_time)[0], calculateCDF(dot_traceroutes_time)[1], label="DoT", color = Colors.tls)
sub_ax.plot(calculateCDF(doh_traceroutes_time)[0], calculateCDF(doh_traceroutes_time)[1], label="DoH", color = Colors.https)
sub_ax.plot(calculateCDF(doq_traceroutes_time)[0], calculateCDF(doq_traceroutes_time)[1], label="DoQ", color = Colors.quic)

sub_ax.legend().remove()
sub_ax.set_xlabel('RTT [ms]')
sub_ax.set_ylabel('CDF')
sub_ax.set_xlim([0,400])
sub_ax.set_ylim([-0.05,1.05])
sub_ax.set_xticks(np.arange(0,410,100))
sub_ax.set_yticks(np.arange(0,1.05,0.2))

fig.show()

## Handshake Time / Traceroute

In [None]:
doq_handshake_traceroute_time = list(doq_successful['q_ui_c_handshake_duration'] / doq_successful['rtt'])
doh_handshake_traceroute_time = list((doh_successful['tcp_handshake_duration'] + doh_successful['tls_handshake_duration']) / doh_successful['rtt'])
dot_handshake_traceroute_time = list((dot_successful['tcp_handshake_duration'] + dot_successful['tls_handshake_duration']) / dot_successful['rtt'])
dotcp_handshake_traceroute_time = list(dotcp_successful['tcp_handshake_duration'] / dotcp_successful['rtt'])


In [None]:
print('Handshake/Traceroutes per protocol:')
print(len(doq_handshake_traceroute_time))
print(len(doh_handshake_traceroute_time))
print(len(dot_handshake_traceroute_time))
print(len(dotcp_handshake_traceroute_time))

In [None]:
fig = plt.figure(figsize = (7, 5))

plt.plot(calculateCDF(doq_handshake_traceroute_time)[0], calculateCDF(doq_handshake_traceroute_time)[1], label="DoQ", color = Colors.quic)
plt.plot(calculateCDF(doh_handshake_traceroute_time)[0], calculateCDF(doh_handshake_traceroute_time)[1], label="DoH", color = Colors.https)
plt.plot(calculateCDF(dot_handshake_traceroute_time)[0], calculateCDF(dot_handshake_traceroute_time)[1], label="DoT", color = Colors.tls)
plt.plot(calculateCDF(dotcp_handshake_traceroute_time)[0], calculateCDF(dotcp_handshake_traceroute_time)[1], label="DoTCP", color = Colors.tcp)

plt.xlim([0, 6])
plt.ylim([0.0, 1.05])

plt.xlabel("Hanshake Time / RTT", fontsize = 12)
plt.ylabel("CDF", fontsize = 12)

plt.legend()
plt.grid()

plt.show()

In [None]:
fig, main_ax = plt.subplots(figsize = (7, 5))

main_ax.plot(calculateCDF(doq_handshake_time)[0], calculateCDF(doq_handshake_time)[1], label="DoQ", color = Colors.quic)
main_ax.plot(calculateCDF(doh_handshake_time)[0], calculateCDF(doh_handshake_time)[1], label="DoH", color = Colors.https)
main_ax.plot(calculateCDF(dot_handshake_time)[0], calculateCDF(dot_handshake_time)[1], label="DoT", color = Colors.tls)
main_ax.plot(calculateCDF(dotcp_handshake_time)[0], calculateCDF(dotcp_handshake_time)[1], label="DoTCP", color = Colors.tcp)

main_ax.set_xlim([0, 1000])
main_ax.set_ylim([0, 1.05])
main_ax.set_xticks(np.arange(100,1010,100))
main_ax.set_yticks(np.arange(0,1.05,0.1))

main_ax.set_xlabel("Handshake time [ms]")
main_ax.set_ylabel("CDF")

main_ax.legend().remove()
# handles, labels = main_ax.get_legend_handles_labels()
# main_ax.legend(handles[::-1], labels[::-1], loc='lower left', ncol=2, fontsize='large', columnspacing=1)

main_ax.spines['right'].set_visible(False)
main_ax.spines['top'].set_visible(False)
# main_ax.grid()

# sub_ax = fig.add_axes([0.44, 0.2, 0.45, .25])
# sub_ax = fig.add_axes([0.5, 0.23, 0.375, .25])
sub_ax = fig.add_axes([0.57, 0.25, 0.33, .3])
sub_ax.plot(calculateCDF(doq_handshake_traceroute_time)[0], calculateCDF(doq_handshake_traceroute_time)[1], label="DoQ", color = Colors.quic)
sub_ax.plot(calculateCDF(doh_handshake_traceroute_time)[0], calculateCDF(doh_handshake_traceroute_time)[1], label="DoH", color = Colors.https)
sub_ax.plot(calculateCDF(dot_handshake_traceroute_time)[0], calculateCDF(dot_handshake_traceroute_time)[1], label="DoT", color = Colors.tls)
sub_ax.plot(calculateCDF(dotcp_handshake_traceroute_time)[0], calculateCDF(dotcp_handshake_traceroute_time)[1], label="DoTCP", color = Colors.tcp)

sub_ax.legend().remove()
# sub_ax.spines['right'].set_visible(False)
# sub_ax.spines['top'].set_visible(False)
sub_ax.set_xlim([0,4])
sub_ax.set_ylim([-0.05,1.05])
sub_ax.set_xlabel('Handshake-to-RTT ratio')
sub_ax.set_ylabel('CDF')
sub_ax.set_xticks(np.arange(0,4,1))
sub_ax.set_yticks(np.arange(0,1.05,0.2))

fig.show()

## DoH on RTT verified

In [None]:
doh_successful_traceroutes = doh_successful[doh_successful['ip'].isin(measurements_traceroutes_successful_ips_doh)]


### TLS versions

In [None]:
doh_successful_traceroutes_tls = doh_successful_traceroutes.groupby('tls_version').size().reset_index(name = "total")
doh_successful_traceroutes_tls

# all measurements with TLS 1.2 are still there

In [None]:
doh_successful_traceroutes_tls['total'][1] / doh_successful_traceroutes_tls['total'].sum()


## DoT on RTT verified

In [None]:
dot_successful_traceroutes = dot_successful[dot_successful['ip'].isin(measurements_traceroutes_successful_ips_dot)]


### TLS versions

In [None]:
dot_successful_traceroutes_tls = dot_successful_traceroutes.groupby('tls_version').size().reset_index(name = "total")
dot_successful_traceroutes_tls

# all measurements with TLS 1.2 are still there

In [None]:
dot_successful_traceroutes_tls['total'][1] / dot_successful_traceroutes_tls['total'].sum()


# QLOGS

In [None]:
qlogs = pd.read_sql_query("SELECT * FROM q_log_outputs", connection_final)
qlogs

In [None]:
# select only qlogs from measurements_successful

qlogs_successful = qlogs.merge(measurements_successful, left_on=['dns_measurement_id'], right_on=['id'])
qlogs_successful

In [None]:
len(qlogs_successful[qlogs_successful['cache_warming'] != 0])

In [None]:
# Samples with retry
qlogs_successful_retry = qlogs_successful[qlogs_successful['content'].str.contains('retry')]

print('Retry: ' + str(len(qlogs_successful_retry)) + ' / ' + str(len(qlogs_successful_retry) / len(qlogs_successful)))


In [None]:
# Samples with new_token
qlogs_successful_new_token = qlogs_successful[qlogs_successful['content'].str.contains('new_token')]

print('New Token: ' + str(len(qlogs_successful_new_token)) + ' / ' + str(len(qlogs_successful_new_token) / len(qlogs_successful)))


In [None]:
# Samples with version negotiation
qlogs_successful_version_negotiation = qlogs_successful[qlogs_successful['content'].str.contains('version_negotiation')]

print('Version Negotitaion: ' + str(len(qlogs_successful_version_negotiation)) + ' / ' + str(len(qlogs_successful_version_negotiation) / len(qlogs_successful)))


# cache_warming = 1

In [None]:
#load dns_measurements w03
measurements_cache_warming = pd.read_sql_query("SELECT * FROM dns_measurements WHERE cache_warming = 1", connection_final)


In [None]:
# merge misc ip/port combinations from week 2022-02 with measurements -> this removes all ports on resolvers which were not doq_verified

measurements_cache_warming_quic = measurements_cache_warming[measurements_cache_warming['protocol'] == 'quic']
measurements_cache_warming_quic = measurements_cache_warming_quic.merge(quic_ports, on=['ip', 'port'])


In [None]:
# drop duplicates on floored timestamp and ip -> this removes resolvers with doubled ports (e.g., adguard)

measurements_cache_warming_quic['created_datetime'] = pd.to_datetime(measurements_cache_warming_quic['created'], unit='s') 
measurements_cache_warming_quic['created_datetime_hour'] = measurements_cache_warming_quic['created_datetime'].dt.floor('h')
measurements_cache_warming_quic_cleaned = measurements_cache_warming_quic.drop_duplicates(['ip', 'created_datetime_hour'])


In [None]:
# all measurements of Resolvers which answered at least once with an r_code for every protocol
measurements_cache_warming_quic_cleaned_responsive = measurements_cache_warming_quic_cleaned[measurements_cache_warming_quic_cleaned['ip'].isin(measuremens_ips_all_responsive)]


In [None]:
# only successful measurements

mask = (~measurements_cache_warming_quic_cleaned_responsive['r_code'].isna()) & (measurements_cache_warming_quic_cleaned_responsive['error'].isnull()) & (~measurements_cache_warming_quic_cleaned_responsive['total_time'].isna()) & (measurements_cache_warming_quic_cleaned_responsive['total_time'] < 5e9)
measurements_cache_warming_quic_cleaned_successful = measurements_cache_warming_quic_cleaned_responsive[mask]


In [None]:
# account for traceroutes
measurements_cache_warming_quic_cleaned_successful_traceroutes = measurements_cache_warming_quic_cleaned_successful.merge(traceroutes_destinations_successful_cleaned, left_on=['id', 'protocol'], right_on=['dns_measurement_id', 'protocol'])
measurements_cache_warming_quic_cleaned_successful_traceroutes


In [None]:
doq_cache_warming_handshake_time = list(measurements_cache_warming_quic_cleaned_successful_traceroutes['q_ui_c_handshake_duration'] / 1000000)
doq_cache_warming_traceroutes_time = list(measurements_cache_warming_quic_cleaned_successful_traceroutes['rtt'] / 1000000)
doq_cache_warming_handshake_traceroute_time = list(measurements_cache_warming_quic_cleaned_successful_traceroutes['q_ui_c_handshake_duration'] / measurements_cache_warming_quic_cleaned_successful_traceroutes['rtt'])



In [None]:
# custom linestyles and linewidth
cor_quic_ls = (0, (3, 3))
# cor_quic_ls = '--'
cor_quic_lw = 2.25

In [None]:
fig, main_ax = plt.subplots(figsize = (7, 5))

main_ax.plot(calculateCDF(doq_handshake_time)[0], calculateCDF(doq_handshake_time)[1], label="DoQ", color = Colors.quic)
main_ax.plot(calculateCDF(doh_handshake_time)[0], calculateCDF(doh_handshake_time)[1], label="DoH", color = Colors.https)
main_ax.plot(calculateCDF(dot_handshake_time)[0], calculateCDF(dot_handshake_time)[1], label="DoT", color = Colors.tls)
main_ax.plot(calculateCDF(dotcp_handshake_time)[0], calculateCDF(dotcp_handshake_time)[1], label="DoTCP", color = Colors.tcp)
main_ax.plot(calculateCDF(doq_cache_warming_handshake_time)[0], calculateCDF(doq_cache_warming_handshake_time)[1], label="DoQ CW", color = Colors.quic,
            linestyle=cor_quic_ls, linewidth=cor_quic_lw)


main_ax.set_xlim([0, 1300])
main_ax.set_ylim([0, 1.05])
main_ax.set_xticks(np.arange(0,1310,200))
main_ax.set_yticks(np.arange(0,1.05,0.1))

main_ax.set_xlabel("Handshake time [ms]")
main_ax.set_ylabel("CDF")

main_ax.legend().remove()
# handles, labels = main_ax.get_legend_handles_labels()
# main_ax.legend(handles[::-1], labels[::-1], loc='lower left', ncol=2, fontsize='large', columnspacing=1)

main_ax.spines['right'].set_visible(False)
main_ax.spines['top'].set_visible(False)
# main_ax.grid()

# sub_ax = fig.add_axes([0.44, 0.2, 0.45, .25])
# sub_ax = fig.add_axes([0.5, 0.23, 0.375, .25])
sub_ax = fig.add_axes([0.57, 0.25, 0.33, .3])
sub_ax.plot(calculateCDF(doq_handshake_traceroute_time)[0], calculateCDF(doq_handshake_traceroute_time)[1], label="DoQ", color = Colors.quic)
sub_ax.plot(calculateCDF(doh_handshake_traceroute_time)[0], calculateCDF(doh_handshake_traceroute_time)[1], label="DoH", color = Colors.https)
sub_ax.plot(calculateCDF(dot_handshake_traceroute_time)[0], calculateCDF(dot_handshake_traceroute_time)[1], label="DoT", color = Colors.tls)
sub_ax.plot(calculateCDF(dotcp_handshake_traceroute_time)[0], calculateCDF(dotcp_handshake_traceroute_time)[1], label="DoTCP", color = Colors.tcp)
sub_ax.plot(calculateCDF(doq_cache_warming_handshake_traceroute_time)[0], calculateCDF(doq_cache_warming_handshake_traceroute_time)[1], label="DoQ CW", color = Colors.quic,
            linestyle=cor_quic_ls, linewidth=cor_quic_lw)


sub_ax.legend().remove()
# sub_ax.spines['right'].set_visible(False)
# sub_ax.spines['top'].set_visible(False)
sub_ax.set_xlim([0,5])
sub_ax.set_ylim([-0.05,1.05])
sub_ax.set_xlabel('Handshake-to-RTT ratio')
sub_ax.set_ylabel('CDF')
sub_ax.set_xticks(np.arange(0,6,1))
sub_ax.set_yticks(np.arange(0,1.05,0.2))

main_ax.annotate('First DoQ Session\nEstablishments',
                 xy=(850, 0.8), xycoords='data',
                 xytext=(950, 0.8), textcoords='data',
                 arrowprops=dict(facecolor='black', shrink=0.1, width=0.1, headwidth=5, headlength=5),
                 ha='left', va='center',
                 fontsize=12)
sub_ax.annotate('',
                xy=(4, 0.75), xycoords='data',
                xytext=(4, 1.5), textcoords='data',
                arrowprops=dict(facecolor='black', shrink=0.1, width=0.1, headwidth=5, headlength=5),
                ha='right', va='bottom')

fig.show()

In [None]:
print('mean/median:')
print(np.mean(doq_cache_warming_handshake_time))
print(np.median(doq_cache_warming_handshake_time))
