In [None]:
import logging

import numpy as np
import dns.message, dns.query, dns.rdataclass, dns.rdatatype, dns.flags, dns.exception, dns.name
from tqdm import tqdm
from tranco import Tranco
import pandas as pd

In [None]:
t = Tranco(cache=True, cache_dir='.tranco')
latest_list = t.list()

In [None]:
IN = dns.rdataclass.from_text("IN")
NS = dns.rdatatype.from_text("NS")
SOA = dns.rdatatype.from_text("SOA")
DS = dns.rdatatype.from_text("DS")
A = dns.rdatatype.from_text("A")
AAAA = dns.rdatatype.from_text("AAAA")
RRSIG = dns.rdatatype.from_text("RRSIG")

In [None]:
def query(q):
    logging.info(f'Query: {q.question[0].name} {dns.rdatatype.to_text(q.question[0].rdtype)}')
    q.flags = q.flags | dns.flags.CD
    try:
        r = dns.query.udp(q, where='127.0.0.1', port=5301, timeout=5)
    except dns.exception.Timeout:
        r = dns.query.udp(q, where='127.0.0.1', port=5301, timeout=5)
    logging.debug(f'Response Code: {r.rcode()}')
    logging.debug(f'Response: \n{r}')
    if r.rcode() == 2:  # servfail
        raise Exception(f"SERVFAIL after asking {q.question[0].name} {dns.rdatatype.to_text(q.question[0].rdtype)}")
    return r

def zone_soa(qname):
    while True: # loop over qname parents to query for SOA
        q = dns.message.make_query(qname, SOA, want_dnssec=True)
        r = query(q)
        if r.rcode() == 3:  # NXDOMAIN
            return None
        if r.rcode() != 0:
            raise Exception(f"zone_soa({qname}) query response code:{r.rcode()}")
        name = qname
        while True: # loop over qname parents to find SOA record in DNS reply
            rr = r.get_rrset(r.authority, name, IN, SOA) or r.get_rrset(r.answer, name, IN, SOA)
            if rr:
                return rr
            logging.debug(f"getting parent of name {name}")
            if name == dns.name.root:
                break
            name = name.parent()
        logging.debug(f"getting parent of qname {qname}")
        qname = qname.parent()

def zone_name(qname):
    soa = zone_soa(qname)
    return soa.name if soa else None
        

In [None]:
def zone_ns(qname):
    q = dns.message.make_query(qname, NS, want_dnssec=True)
    q.flags = q.flags | dns.flags.CD
    r = query(q)
    ns_set = r.get_rrset(r.answer, qname, IN, NS)
    rrsig = r.get_rrset(r.answer, qname, IN, RRSIG, covers=NS)
    ad = 'AD' in dns.flags.to_text(r.flags)
    return ns_set or [], rrsig, ad

In [None]:
domains = latest_list.top(10**6)

In [None]:
def check_domain(d):
    try:
        d = dns.name.from_text(d)
        z = zone_name(d)
        
        if z is None:
            raise Exception(f"Could not identify zone name for domain {d}")
        
        ns_set, ns_rrsig, z_secure = zone_ns(z)
        z_signed = ns_rrsig is not None

        ns_exists_list = []
        ns_signed_list = []
        ns_secure_list = []
        ns_soa_list = []
        for ns in ns_set:
            ns_exists = {}
            ns_signed = {}
            ns_secure = {}
            
            for rdtype in [A, AAAA]:
                r = query(dns.message.make_query(ns.target, rdtype, want_dnssec=True))
                rr_set = r.get_rrset(r.authority, ns.target, IN, rdtype) or r.get_rrset(r.answer, ns.target, IN, rdtype)
                rrsig = r.get_rrset(r.authority, ns.target, IN, RRSIG, covers=rdtype) or r.get_rrset(r.answer, ns.target, IN, RRSIG, covers=rdtype)
                ns_exists[rdtype] = rr_set is not None
                ns_signed[rdtype] = ns_exists[rdtype] and rrsig is not None
                ns_secure[rdtype] = ns_exists[rdtype] and 'AD' in dns.flags.to_text(r.flags)
                if ns_secure[rdtype] and not ns_signed[rdtype]:
                    raise Exception(f"Found {ns.target}/{rdtype} to be secure but not signed")
                            
            ns_exists_list.append(any(ns_exists.values()))
            ns_signed_list.append(any(ns_signed.values()))
            ns_secure_list.append(any(ns_secure.values()))
            
            ns_soa_list.append(zone_soa(ns.target))

        return {
            'domain': d.to_text(),
            'zone': z.to_text(),
            'signed': z_signed,
            'secure': z_secure,
            'ns_exists': np.mean(ns_exists_list),
            'ns_signed': np.mean(ns_signed_list),
            'ns_secure': np.mean(ns_secure_list),
            'ns_num': len(ns_set),
            'ns_names': [ns.target.to_text() for ns in ns_set],
            'ns_soas': ns_soa_list,
        }
    except Exception as e:
        e.d = d.to_text()
        raise e

In [None]:
import concurrent

executor = concurrent.futures.ThreadPoolExecutor(50)

In [None]:
def run_queries(domains):
    errors = []
    results = []    
    futures = {d: executor.submit(check_domain, d) for d in tqdm(domains, desc="Scheduling queries")}
    with tqdm(total=len(futures), desc="Querying") as pbar:
        for future in concurrent.futures.as_completed(futures.values()):
            pbar.update(1)
            if future.exception():
                logging.warning(f"{future.exception().d}: {future.exception()}")
                errors.append(future.exception().d)
            else:
                results.append(future.result())
    return results, errors

In [None]:
results, errors = run_queries(domains)

In [None]:
results_retry, errors_retry = run_queries(errors)

In [None]:
len(errors_retry) / len(domains)

In [None]:
data = pd.DataFrame(results + results_retry)
data['tld'] = data.apply(lambda row: row['domain'].rsplit('.', 2)[-2], axis=1)
data['bootstrapable'] = (data['ns_secure'] == 1) & ~data['secure']
data['bootstrapable_weak'] = (data['ns_secure'] > 0) & ~data['secure']
data['ns_secure_all'] = np.floor(data['ns_secure']) == 1
data

In [None]:
data.to_pickle(f'tranco-{len(domains)}-data.pickle')

# Debug

In [None]:
logging.basicConfig(level=logging.DEBUG, force=True)

In [None]:
check_domain(errors_retry[0])