In [1]:
import logging

import numpy as np
import dns.message, dns.query, dns.rdataclass, dns.rdatatype, dns.flags, dns.exception, dns.name
from tqdm import tqdm
from tranco import Tranco
import pandas as pd

In [2]:
t = Tranco(cache=True, cache_dir='.tranco')
latest_list = t.list()

In [3]:
IN = dns.rdataclass.from_text("IN")
NS = dns.rdatatype.from_text("NS")
DS = dns.rdatatype.from_text("DS")
DNSKEY = dns.rdatatype.from_text("DNSKEY")
SOA = dns.rdatatype.from_text("SOA")
DS = dns.rdatatype.from_text("DS")
A = dns.rdatatype.from_text("A")
AAAA = dns.rdatatype.from_text("AAAA")
RRSIG = dns.rdatatype.from_text("RRSIG")

In [4]:
def query(q):
    logging.info(f'Query: {q.question[0].name} {dns.rdatatype.to_text(q.question[0].rdtype)}')
    q.flags = q.flags | dns.flags.CD
    try:
        r = dns.query.tcp(q, where='127.0.0.1', port=5301, timeout=5)
    except dns.exception.Timeout:
        r = dns.query.tcp(q, where='127.0.0.1', port=5301, timeout=5)
    logging.debug(f'Response Code: {r.rcode()}')
    logging.debug(f'Response: \n{r}')
    if r.rcode() == 2:  # servfail
        raise Exception(f"SERVFAIL after asking {q.question[0].name} {dns.rdatatype.to_text(q.question[0].rdtype)}")
    return r

In [5]:
def zone_soa(qname):
    while True: # loop over qname parents to query for SOA
        q = dns.message.make_query(qname, SOA, want_dnssec=True)
        r = query(q)
        if r.rcode() == 3:  # NXDOMAIN
            return None, None
        if r.rcode() != 0:
            raise Exception(f"zone_soa({qname}) query response code:{r.rcode()}")
        name = qname
        while True: # loop over qname parents to find SOA record in DNS reply
            rr = r.get_rrset(r.authority, name, IN, SOA) or r.get_rrset(r.answer, name, IN, SOA)
            rrsig = r.get_rrset(r.authority, name, IN, RRSIG, covers=SOA) or r.get_rrset(r.answer, name, IN, RRSIG, covers=SOA) or []
            if rr:
                return rr, rrsig
            logging.debug(f"getting parent of name {name}")
            if name == dns.name.root:
                break
            name = name.parent()
        logging.debug(f"getting parent of qname {qname}")
        qname = qname.parent()

def zone_ds(qname):
    q = dns.message.make_query(qname, DS)
    q.flags = q.flags | dns.flags.CD
    r = query(q)
    ds_set = r.get_rrset(r.answer, qname, IN, DS)
    return ds_set or []

def zone_dnskey(qname):
    q = dns.message.make_query(qname, DNSKEY, want_dnssec=True)
    q.flags = q.flags | dns.flags.CD
    r = query(q)
    dnskey_set = r.get_rrset(r.answer, qname, IN, DNSKEY) or []
    dnskey_set_rrsig = r.get_rrset(r.answer, qname, IN, RRSIG, covers=DNSKEY) or []
    return dnskey_set, dnskey_set_rrsig

In [6]:
domains = latest_list.top(10**6)

In [7]:
def check_domain(d):
    try:
        d = dns.name.from_text(d)
        soa, soa_rrsig = zone_soa(d)
        z = soa.name if soa else None
        
        if z is None:
            raise Exception(f"Could not identify zone name for domain {d}")
        
        ds = zone_ds(z)
        dnskey, dnskey_rrsig = (zone_dnskey(z)) if ds else ([], [])

        return {
            'domain': d.to_text(),
            'zone': z.to_text(),
            'soa_rrsig': soa_rrsig,
            'ds': ds,
            'dnskey': dnskey,
            'dnskey_rrsig': dnskey_rrsig,
        }
    except Exception as e:
        e.d = d.to_text()
        raise e

In [8]:
import concurrent

executor = concurrent.futures.ThreadPoolExecutor(50)

In [9]:
def run_queries(domains):
    errors = []
    results = []    
    futures = {d: executor.submit(check_domain, d) for d in tqdm(domains, desc="Scheduling queries")}
    with tqdm(total=len(futures), desc="Querying") as pbar:
        for future in concurrent.futures.as_completed(futures.values()):
            pbar.update(1)
            if future.exception():
                logging.warning(f"{future.exception().d}: {future.exception()}")
                errors.append(future.exception().d)
            else:
                results.append(future.result())
    return results, errors

In [10]:
results, errors = run_queries(domains)

Scheduling queries: 100%|██████████| 1000000/1000000 [01:12<00:00, 13824.40it/s]
























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































Querying: 100%|██████████| 1000000/1000000 [52:41<00:00, 316.32it/s]


In [11]:
results_retry, errors_retry = run_queries(errors)

Scheduling queries: 100%|██████████| 41759/41759 [00:00<00:00, 90094.04it/s] 






















































































































































































































































































































































































































































































































































































































































































Querying: 100%|██████████| 41759/41759 [05:03<00:00, 137.73it/s]


In [22]:
1 - len(errors_retry) / len(domains)

0.967374

In [24]:
len(data)

967374

In [13]:
data = pd.DataFrame(results + results_retry)
data['ds_algos'] = data.apply(lambda row: {rr.algorithm for rr in row['ds']}, axis=1)
data['dnskey_algos'] = data.apply(lambda row: {rr.algorithm for rr in row['dnskey']}, axis=1)
data['dnskey_rrsig_algos'] = data.apply(lambda row: {rr.algorithm for rr in row['dnskey_rrsig']}, axis=1)
data['soa_rrsig_algos'] = data.apply(lambda row: {rr.algorithm for rr in row['soa_rrsig']}, axis=1)
data[data['ds'].apply(len) > 0].head(3)

Unnamed: 0,domain,zone,soa_rrsig,ds,dnskey,dnskey_rrsig,ds_algos,dnskey_algos,dnskey_rrsig_algos,soa_rrsig_algos
89,nau.edu.,nau.edu.,(SOA 5 2 500 20211026092727 20211005082727 243...,(52534 5 2 08a5aaf97b1dabd3fa9e6543d8c75f28341...,(256 3 5 AwEAAcVQJCSV70wHoQqtagAOFPJq/8EK T/aP...,(DNSKEY 5 2 86400 20211018133049 2021092712304...,{Algorithm.RSASHA1},{Algorithm.RSASHA1},{Algorithm.RSASHA1},{Algorithm.RSASHA1}
95,fox32chicago.com.,fox32chicago.com.,(SOA 13 2 7200 20211006095830 20211004095830 4...,(48553 13 2 d3f6df85fe799aaf73d545cc2f823ec844...,(256 3 13 oEoHgXY3h5rfd/IxIzHXQcpcclOrDRjT tzj...,(DNSKEY 13 2 7200 20211012061655 2021100406165...,{Algorithm.ECDSAP256SHA256},{Algorithm.ECDSAP256SHA256},{Algorithm.ECDSAP256SHA256},{Algorithm.ECDSAP256SHA256}
132,powells.com.,powells.com.,(SOA 13 2 3600 20211006105716 20211004085716 3...,(2371 13 2 a4b3aab7353307b1b6f48be4164999e850b...,(256 3 13 oJMRESz5E4gYzS/q6XDrvU1qMPYIjCWz JaO...,(DNSKEY 13 2 3600 20211113130821 2021091313082...,{Algorithm.ECDSAP256SHA256},{Algorithm.ECDSAP256SHA256},{Algorithm.ECDSAP256SHA256},{Algorithm.ECDSAP256SHA256}


In [14]:
data.head(3)

Unnamed: 0,domain,zone,soa_rrsig,ds,dnskey,dnskey_rrsig,ds_algos,dnskey_algos,dnskey_rrsig_algos,soa_rrsig_algos
0,meituan.com.,meituan.com.,[],[],[],[],{},{},{},{}
1,theiet.org.,theiet.org.,[],[],[],[],{},{},{},{}
2,convertkit.com.,convertkit.com.,[],[],[],[],{},{},{},{}


In [15]:
data['dnskey_rrsig_ok'] = data.apply(lambda row: row['dnskey_rrsig_algos'].issuperset(row['ds_algos']), axis=1)
data['soa_rrsig_ok'] = data.apply(lambda row: row['soa_rrsig_algos'].issuperset(row['dnskey_algos']), axis=1)

In [16]:
data['dnskey_rrsig_ok'].mean(), data['soa_rrsig_ok'].mean()

(0.9998697504791322, 0.9999813929255903)

In [17]:
(~data['dnskey_rrsig_ok']).sum(), (~data['soa_rrsig_ok']).sum()

(126, 18)

In [27]:
import numpy as np

In [38]:
data[~data['dnskey_rrsig_ok'] | ~data['dnskey_rrsig_ok']][['zone', 'ds_algos', 'dnskey_rrsig_algos', 'dnskey_rrsig_ok', 'dnskey_algos', 'soa_rrsig_algos', 'soa_rrsig_ok']]

Unnamed: 0,zone,ds_algos,dnskey_rrsig_algos,dnskey_rrsig_ok,dnskey_algos,soa_rrsig_algos,soa_rrsig_ok
1914,kmu.gov.ua.,{Algorithm.RSASHA512},{},False,{Algorithm.RSASHA512},{Algorithm.RSASHA512},True
17420,socialsecurity.gov.,"{Algorithm.ECDSAP256SHA256, Algorithm.RSASHA1N...",{Algorithm.ECDSAP256SHA256},False,{Algorithm.ECDSAP256SHA256},{Algorithm.ECDSAP256SHA256},True
19954,tourismthailand.org.,"{Algorithm.ECDSAP256SHA256, Algorithm.RSASHA1N...",{Algorithm.ECDSAP256SHA256},False,{Algorithm.ECDSAP256SHA256},{Algorithm.ECDSAP256SHA256},True
30572,kk.dk.,{Algorithm.RSASHA256},{},False,{Algorithm.RSASHA256},{Algorithm.RSASHA256},True
36322,svbconnect.com.,"{Algorithm.ECDSAP256SHA256, Algorithm.RSASHA1}",{Algorithm.RSASHA1},False,{Algorithm.RSASHA1},{Algorithm.RSASHA1},True
...,...,...,...,...,...,...,...
924098,thelavender.net.,"{Algorithm.RSASHA256, Algorithm.ECDSAP256SHA256}",{Algorithm.ECDSAP256SHA256},False,{Algorithm.ECDSAP256SHA256},{Algorithm.ECDSAP256SHA256},True
927573,panobcan.sk.,{Algorithm.ECDSAP256SHA256},{},False,{},{},True
931891,sjxrbj.com.,{Algorithm.RSASHA256},{},False,{},{},True
936094,autobanden-365.nl.,"{Algorithm.ECDSAP256SHA256, Algorithm.RSASHA1N...",{Algorithm.ECDSAP256SHA256},False,{Algorithm.ECDSAP256SHA256},{Algorithm.ECDSAP256SHA256},True


In [39]:
data['dangling_ds_algo'] = data.apply(lambda row: row['ds_algos'] - row['dnskey_rrsig_algos'], axis=1)

In [62]:
data['dangling_dnskey_algo'] = data.apply(lambda row: row['dnskey_algos'] - row['soa_rrsig_algos'], axis=1)

In [61]:
dangling, counts = np.unique(data['dangling_ds_algo'], return_counts=True)
hist = {}
for algos, num in zip([tuple(sorted(d)) for d in dangling], counts):
    hist[algos] = hist.get(algos, 0) + num
hist

{(): 967248,
 (<Algorithm.ECDSAP256SHA256: 13>,): 53,
 (<Algorithm.RSASHA1: 5>,): 10,
 (<Algorithm.RSASHA1NSEC3SHA1: 7>,): 17,
 (<Algorithm.RSASHA256: 8>,): 33,
 (<Algorithm.RSASHA512: 10>,): 5,
 (<Algorithm.RSAMD5: 1>,): 4,
 (<Algorithm.RSASHA1NSEC3SHA1: 7>, <Algorithm.RSASHA256: 8>): 1,
 (<Algorithm.ECCGOST: 12>,): 2,
 (<Algorithm.DSA: 3>,): 1}

In [63]:
dangling, counts = np.unique(data['dangling_dnskey_algo'], return_counts=True)
hist = {}
for algos, num in zip([tuple(sorted(d)) for d in dangling], counts):
    hist[algos] = hist.get(algos, 0) + num
hist

{(): 967356,
 (<Algorithm.RSASHA256: 8>,): 7,
 (<Algorithm.RSASHA1NSEC3SHA1: 7>,): 6,
 (<Algorithm.ECDSAP256SHA256: 13>,): 4,
 (<Algorithm.RSASHA512: 10>,): 1}

In [64]:
data.to_pickle(f'dnssec-misconfiguration-prevalence-tranco-top-1m.pickle')

# Debug

In [20]:
logging.basicConfig(level=logging.DEBUG, force=True)

In [21]:
zone_dnskey(dns.name.from_text('europa.eu'))

INFO:root:Query: europa.eu. DNSKEY
DEBUG:root:Response Code: 0
DEBUG:root:Response: 
id 50898
opcode QUERY
rcode NOERROR
flags QR RD RA AD CD
edns 0
eflags DO
payload 512
;QUESTION
europa.eu. IN DNSKEY
;ANSWER
europa.eu. 3600 IN DNSKEY 257 3 8 AwEAAd6ILwCObzteZw4iDDFF5Ab0GGaE vtogmEvjNeZyjczgtq641zpu38BNDMmg RM6tDqoYtphAoqWjEUkI8XiTYW6go+VG tqY06VDdX5+Hl0l2YOAAxKUo6EYlB9no s5wzKD5FeEYaBP2UCmyzk1hZyfTIOcLX kZt1uUQF7l3MmQ1ETB38pOvbwxU0EQVK uG9YmKKogjW7Ujab4JFIVxqyN+Fxhh7i srceHce0ITryL8e64wVwX9GQNA3zLxK7 XQvOJswv82ebYwsl8PmIRwi7hkpJMuz+ oQe/i5E6xLpAXRfCDENrcHgXi9FbRmC9 Vk9RVU82BpYfrsoNZSNSeRDrJl5iftwt acOpf+QvaI1je7O5cd0VwehjteT9Rfs0 Gx27WlZ7KBw7ZurwLdosvNUjKzzvLpmU iKvEyicmgZ+5cGYSTAIdhcAri+mKVpN7 76qeF2v6fOSEtHFK/Bpu7HMO6FQ3e7Kt FiQPMdYe6WSaXODA1EoVHX2IcSNhxnMu hvt7QUaPLVfs0kbzhd2XlZH56D07kLLN Asnwl6ZeBnoqbe/vlio8/HXzDFlPmWTT UqLbPNdx0kfqbQEmkc1IRHnB0upy6/Vv e86K9cl4FBW2eYdWZprm2c+ZdEJXsVAG o3w4LutKFjHLweABS0y/BNQN0a0Ga8GV TzmKK9csklZRqj3d
europa.eu. 3600 IN DNSKEY 256 3 8 AwEAAaRm9SpK

(<DNS europa.eu. IN DNSKEY RRset: [<257 3 8 AwEAAd6ILwCObzteZw4iDDFF5Ab0GGaE vtogmEvjNeZyjczgtq641zpu38BNDMmg RM6tDqoYtphAoqWjEUkI8XiTYW...>, <256 3 8 AwEAAaRm9SpK2QM+wXxIeLqn3YvFZTl7 eZM9vTgV4jbMIjimFSmcMpPAwQPRarW9 /7RxjmLDgMOTSbXExAUzpHOMqL...>, <256 3 8 AwEAAfT74AjVLRP8RN2jgLAM4ZwAX3KP Z1bwd+OCaDtrtK4VRJDL1/Zddi23L14r Yb2N0HeIkO1MRlydrfz3KbVBly...>]>,
 <DNS europa.eu. IN RRSIG(DNSKEY) RRset: [<DNSKEY 8 2 3600 20211031044822 20211001040042 14845 europa.eu. vDgYkmEOKSxSyvsyHM5hi5qXF+Ask4s0 L7qB...>, <DNSKEY 8 2 3600 20211031044822 20211001040042 64599 europa.eu. z5NIHgCwiJDsBsuEAX5dv6DfN1cCf1Uz hhu9...>]>)