In [1]:
import logging

import numpy as np
import dns.message, dns.query, dns.rdataclass, dns.rdatatype, dns.flags, dns.exception, dns.name
from tqdm import tqdm
from tranco import Tranco
import pandas as pd

In [2]:
t = Tranco(cache=True, cache_dir='.tranco')
latest_list = t.list()

In [3]:
IN = dns.rdataclass.from_text("IN")
NS = dns.rdatatype.from_text("NS")
SOA = dns.rdatatype.from_text("SOA")
DS = dns.rdatatype.from_text("DS")
A = dns.rdatatype.from_text("A")
AAAA = dns.rdatatype.from_text("AAAA")
RRSIG = dns.rdatatype.from_text("RRSIG")

In [4]:
def query(q):
    logging.info(f'Query: {q.question[0].name} {dns.rdatatype.to_text(q.question[0].rdtype)}')
    try:
        r = dns.query.udp(q, where='127.0.0.1', port=5301, timeout=5)
    except dns.exception.Timeout:
        r = dns.query.udp(q, where='127.0.0.1', port=5301, timeout=5)
    if r.rcode() == 2:  # servfail
        raise Exception("SERVFAIL")
    logging.debug(f'Response Code: {r.rcode()}')
    logging.debug(f'Response: \n{r}')
    return r

def zone_name(qname):
    while True: # loop over qname parents to query for SOA
        q = dns.message.make_query(qname, SOA, want_dnssec=True)
        q.flags = q.flags | dns.flags.CD
        r = query(q)
        name = qname
        while True: # loop over qname parents to find SOA record in DNS reply
            rr = r.get_rrset(r.authority, name, IN, SOA) or r.get_rrset(r.answer, name, IN, SOA)
            if rr:
                return rr.name
            logging.debug(f"getting parent of name {name}")
            if name == dns.name.root:
                break
            name = name.parent()
        logging.debug(f"getting parent of qname {qname}")
        qname = qname.parent()
        

In [5]:
def zone_ns(qname):
    q = dns.message.make_query(qname, NS, want_dnssec=True)
    q.flags = q.flags | dns.flags.CD
    r = query(q)
    ns_set = r.get_rrset(r.answer, qname, IN, NS)
    rrsig = r.get_rrset(r.answer, qname, IN, RRSIG, covers=NS)
    ad = 'AD' in dns.flags.to_text(r.flags)
    return ns_set or [], rrsig, ad

In [6]:
domains = latest_list.top(100)

In [7]:
def check_domain(d):
    try:
        d = dns.name.from_text(d)
        z = zone_name(d)

        ns_set, ns_rrsig, z_secure = zone_ns(z)
        z_signed = ns_rrsig is not None
        #print(f"{z} {z_signed=} {z_secure=}")

        ns_signed_list = []
        ns_secure_list = []
        for ns in ns_set:
            ns_secure = ns_signed = True
            for rdtype in [A, AAAA]:
                r = query(dns.message.make_query(ns.target, rdtype, want_dnssec=True))
                rr_set = r.get_rrset(r.authority, ns.target, IN, rdtype) or r.get_rrset(r.answer, ns.target, IN, rdtype)
                rrsig = r.get_rrset(r.authority, ns.target, IN, RRSIG, covers=rdtype) or r.get_rrset(r.answer, ns.target, IN, RRSIG, covers=rdtype)
                ns_secure = ns_secure and 'AD' in dns.flags.to_text(r.flags)
                ns_signed = ns_signed and (ns_secure or (rr_set is not None and rrsig is not None))
            if not ns_signed and ns_secure:
                raise Exception(f"Found {ns.target} to be secure but not signed")
            ns_signed_list.append(ns_signed)
            ns_secure_list.append(ns_secure)

        return {
            'domain': d.to_text(),
            'zone': z.to_text(),
            'signed': z_signed,
            'secure': z_secure,
            'ns_signed': np.mean(ns_signed_list),
            'ns_secure': np.mean(ns_secure_list),
            'ns_num': len(ns_set),
            'ns_names': [ns.target.to_text() for ns in ns_set],
        }
    except Exception as e:
        e.d = d.to_text()
        raise e

In [8]:
import concurrent

executor = concurrent.futures.ThreadPoolExecutor(50)

In [9]:
def run_queries(domains):
    errors = []
    results = []    
    futures = {d: executor.submit(check_domain, d) for d in domains}
    with tqdm(total=len(futures)) as pbar:
        for future in concurrent.futures.as_completed(futures.values()):
            pbar.update(1)
            if future.exception():
                logging.warning(f"{future.exception().d}: {future.exception()}")
                errors.append(future.exception().d)
            else:
                results.append(future.result())
    return results, errors

In [10]:
results, errors = run_queries(domains)

100%|██████████| 100/100 [00:04<00:00, 23.70it/s]


In [11]:
results_retry, errors_retry = run_queries(errors)

0it [00:00, ?it/s]


In [12]:
len(errors_retry) / len(domains)

0.0

In [13]:
data = pd.DataFrame(results + results_retry)
data['tld'] = data.apply(lambda row: row['domain'].rsplit('.', 2)[-2], axis=1)
data['bootstrapable'] = (data['ns_secure'] == 1) & ~data['secure']
data['bootstrapable_weak'] = (data['ns_secure'] > 0) & ~data['secure']
data

Unnamed: 0,domain,zone,signed,secure,ns_signed,ns_secure,ns_num,ns_names,tld,bootstrapable,bootstrapable_weak
0,vimeo.com.,vimeo.com.,False,False,0.000000,0.000000,4,"[ns-1886.awsdns-43.co.uk., ns-682.awsdns-21.ne...",com,False,False
1,yahoo.com.,yahoo.com.,False,False,0.000000,0.000000,5,"[ns3.yahoo.com., ns2.yahoo.com., ns5.yahoo.com...",com,False,False
2,instagram.com.,instagram.com.,False,False,0.000000,0.000000,4,"[ns-1349.awsdns-40.org., ns-868.awsdns-44.net....",com,False,False
3,flickr.com.,flickr.com.,False,False,0.000000,0.000000,4,"[ns-1683.awsdns-18.co.uk., ns-573.awsdns-07.ne...",com,False,False
4,netflix.com.,netflix.com.,False,False,0.000000,0.000000,4,"[ns-659.awsdns-18.net., ns-81.awsdns-10.com., ...",com,False,False
...,...,...,...,...,...,...,...,...,...,...,...
95,archive.org.,archive.org.,False,False,0.333333,0.333333,6,"[ns0036.secondary.cloudflare.com., ns1.archive...",org,False,True
96,360.cn.,360.cn.,False,False,0.000000,0.000000,6,"[dns7.360safe.com., dns1.360safe.com., dns9.36...",cn,False,False
97,sohu.com.,sohu.com.,False,False,0.000000,0.000000,6,"[ns11.sohu.com., ns14.sohu.com., ns15.sohu.com...",com,False,False
98,yahoo.co.jp.,yahoo.co.jp.,False,False,0.000000,0.000000,4,"[ns02.yahoo.co.jp., ns12.yahoo.co.jp., ns11.ya...",jp,False,False


In [14]:
data.to_pickle(f'tranco-{len(domains)}-data.pickle')

In [15]:
print(f"# Tranco Top {len(domains)} Domains")
print()
print(f"Total proportion of SECURE zones............................: {data['secure'].mean():7.2%}")
print(f"Total proportion of SIGNED zones............................: {data['signed'].mean():7.2%}")
print()
print(f"Total proportion of zones with ALL  name server names SECURE: {np.floor(data['ns_secure']).mean():7.2%}")
print(f"Total proportion of zones with ALL  name server names SIGNED: {np.floor(data['ns_signed']).mean():7.2%}")
print(f"Total proportion of zones with SOME name server names SECURE: {np.ceil(data['ns_secure']).mean():7.2%}")
print(f"Total proportion of zones with SOME name server names SIGNED: {np.ceil(data['ns_signed']).mean():7.2%}")
print()
print(f"Total proportion of      bootstrapable zones................: {data['bootstrapable'].mean():7.2%}")
print(f"Total proportion of weak bootstrapable zones................: {data['bootstrapable_weak'].mean():7.2%}")

# Tranco Top 100 Domains

Total proportion of SECURE zones............................:   4.00%
Total proportion of SIGNED zones............................:   5.00%

Total proportion of zones with ALL  name server names SECURE:   6.00%
Total proportion of zones with ALL  name server names SIGNED:   6.00%
Total proportion of zones with SOME name server names SECURE:  20.00%
Total proportion of zones with SOME name server names SIGNED:  20.00%

Total proportion of      bootstrapable zones................:   4.00%
Total proportion of weak bootstrapable zones................:  17.00%


In [16]:
data.groupby('tld').agg({
    'bootstrapable': ['mean', 'count']
}).sort_values(('bootstrapable', 'count'), ascending=False).head(10)

Unnamed: 0_level_0,bootstrapable,bootstrapable
Unnamed: 0_level_1,mean,count
tld,Unnamed: 1_level_2,Unnamed: 2_level_2
com,0.030769,65
org,0.25,8
net,0.0,8
tv,0.0,3
cn,0.0,2
be,0.0,1
jp,0.0,1
uk,0.0,1
ru,0.0,1
ly,0.0,1


# Debug

In [17]:
logging.basicConfig(level=logging.DEBUG, force=True)

In [18]:
check_domain("www.nhs.uk")

INFO:root:Query: www.nhs.uk. SOA
DEBUG:root:Response Code: 0
DEBUG:root:Response: 
id 29081
opcode QUERY
rcode NOERROR
flags QR RD RA CD
edns 0
eflags DO
payload 512
;QUESTION
www.nhs.uk. IN SOA
;ANSWER
www.nhs.uk. 1836 IN CNAME nhswebsite-production.edgekey.net.
nhswebsite-production.edgekey.net. 300 IN CNAME e10931.ig15.akamaiedge.net.
;AUTHORITY
ig15.akamaiedge.net. 1000 IN SOA n0ig15.akamaiedge.net. hostmaster.akamai.com. 1630920856 1000 1000 1000 1800
;ADDITIONAL
DEBUG:root:getting parent of name www.nhs.uk.
DEBUG:root:getting parent of name nhs.uk.
DEBUG:root:getting parent of name uk.
DEBUG:root:getting parent of name .
DEBUG:root:getting parent of qname www.nhs.uk.
INFO:root:Query: nhs.uk. SOA
DEBUG:root:Response Code: 0
DEBUG:root:Response: 
id 30418
opcode QUERY
rcode NOERROR
flags QR RD RA CD
edns 0
eflags DO
payload 512
;QUESTION
nhs.uk. IN SOA
;ANSWER
nhs.uk. 900 IN SOA ns-1230.awsdns-25.org. awsdns-hostmaster.amazon.com. 1 7200 900 1209600 86400
;AUTHORITY
;ADDITIONAL
INF

{'domain': 'www.nhs.uk.',
 'zone': 'nhs.uk.',
 'signed': False,
 'secure': False,
 'ns_signed': 0.0,
 'ns_secure': 0.0,
 'ns_num': 4,
 'ns_names': ['ns-1230.awsdns-25.org.',
  'ns-203.awsdns-25.com.',
  'ns-807.awsdns-36.net.',
  'ns-1685.awsdns-18.co.uk.']}