In [74]:
import subprocess
import json
import copy
import os
import logging
from datetime import datetime, timedelta
from collections import defaultdict
import tldextract
import time
from tqdm import tqdm
import ipaddress
import pickle
from pprint import pprint as pp

def load_data(filename):
    if 'txt' in filename or 'csv' in filename or 'json' in  filename:
        ip_list=[]
        with open(filename,'r') as f:
            ip_list=f.readlines()
        return [i.strip() for i in ip_list]
    else:
        with open(filename,'rb') as f:
            return pickle.load(f)
        
def extract_sld(fqdn):
    result = tldextract.extract(fqdn)
    sld = f"{result.domain}.{result.suffix}"
    return sld

def extract_vendor(fqdn):
    result = tldextract.extract(fqdn)
    sld = f"{result.domain}"
    return sld

def extract(fqdn,extype):
    if extype == 'sld':
        return extract_sld(fqdn)
    if extype == 'vendor':
        return extract_vendor(fqdn)
        
    
    
def load_cache(domain):
    """从文件加载缓存"""
    cache_file = os.path.join(CACHE_DIR, f"{domain}.cache")
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as f:
            return pickle.load(f)
    return []

def load_cache2txt(domain):
    """从文件加载缓存，并保存为txt文件"""
    cache_content = load_cache(domain)
    txt_dir = os.path.join(CACHE_DIR, "txt")
    os.makedirs(txt_dir, exist_ok=True)
    txt_file = os.path.join(txt_dir, f"{domain}.txt")
    with open(txt_file, 'w', encoding='utf-8') as f:
        for item in cache_content:
            f.write(str(item) + '\n')

In [33]:
# INPUTPATH = '/home/nly/DNS/adns_depend/input/tranco1M_gov_edu_test_domain.csv'
INPUTPATH = '/home/nly/DNS/adns_depend/input/重点域名14860_20231230.txt'
# INPUTPATH = '/home/nly/DNS/adns_depend/input/重点域名6766_20240127_ssl证书-去外企.txt'
suf = INPUTPATH.split('/')[-1].split('_')[0]

In [6]:
# 配置日志记录
DATE = '20240820'

LOG_DIR = '/home/nly/DNS/adns_depend/' + "output/error/"+suf+"/"
logging_output_file = f"{LOG_DIR}{DATE}_logging_unique.txt"

OUTPUT_DIR = '/home/nly/DNS/adns_depend/' + "output/adns_path/"+DATE+"/"+suf+"/"

# 文件系统全局缓存路径
CACHE_DIR = '/home/nly/DNS/adns_depend/' + "cache/"+DATE

In [None]:
#         with open(f"{OUTPUT_DIR}{DATETIME}_{query_domain}_allpaths.json", 'w') as f:
#             for path in all_paths:
#                 f.write(str(path) + '\n')

In [56]:
targets = load_data(INPUTPATH)
sld_target_set = set([extract_sld(i) for i in targets])
vendor_target_set = set([extract_vendor(i) for i in targets]) 

print(f'{len(sld_target_set)} {len(vendor_target_set)}')

vendor_targetsld_dict = defaultdict(list)

for i in sld_target_set:
    cur = extract_vendor(i)
    vendor_targetsld_dict[cur].append(i)

14860 14510


# 日志

In [16]:
log_depth = load_data(logging_output_file)

log = [i.split(' ',1)[-1].strip() for i in log_depth]
log = list(set(log))

len(log)

342

In [21]:
error_domain_dic = defaultdict(set)

for i in log:
    try:
        d, err= i.split(' ',1)
    except:
        continue
    error_domain_dic[err].add(d)

In [31]:
for err, l in error_domain_dic.items():
    print(f'{err}:\t{len(l)}')

路径数量超过 1000:	308
循环依赖:	33


# 循环依赖

In [25]:
error_domain_dic['循环依赖']

{'dns.bizcn.com',
 'dns.cein.gov.cn',
 'dns.cnmsn.net',
 'dns2.globedom.com',
 'dns4.cbcc.cn',
 'dns5.ccfccb.cn',
 'es.sia.ac.cn',
 'ns.acfic.cn',
 'ns.biz-email.net',
 'ns.bjedu.com.cn',
 'ns.cdnhost.cn',
 'ns.corp-email.com',
 'ns.icann.org',
 'ns.ptt.js.cn',
 'ns.tpt.net.cn',
 'ns1.bee-net.com',
 'ns1.china-online.com.cn',
 'ns1.corpease.net',
 'ns1.totalenergies.net',
 'ns1.weber.cloud',
 'ns2.east.net',
 'ns2.hccb.com.cn',
 'ns4.totalenergies.info',
 'ns4.webercloud.de',
 'ns5.cnmsn.net',
 'ns61.ultradns2.com',
 'ns61.ultradns2.org',
 'ns7.hzbank.net',
 'ns8.hzbank.net',
 'nsbak.biz-email.net',
 'pdns196.ultradns.co.uk',
 'pdns196.ultradns.info',
 'www.pzhu.edu.cn'}

In [38]:
cycle_fqdn = error_domain_dic['循环依赖']
cycle_sld = set([extract_sld(i) for i in cycle_fqdn])
cycle_vendor = set([extract_vendor(i) for i in cycle_fqdn])
print(f'{len(cycle_fqdn)} {len(cycle_sld)} {len(cycle_vendor)}')

33 30 27


In [39]:
cycle_sld - sld_target_set

{'acfic.cn',
 'bee-net.com',
 'biz-email.net',
 'bizcn.com',
 'bjedu.com.cn',
 'cbcc.cn',
 'cdnhost.cn',
 'cein.gov.cn',
 'cnmsn.net',
 'corpease.net',
 'east.net',
 'globedom.com',
 'hzbank.net',
 'icann.org',
 'ptt.js.cn',
 'totalenergies.info',
 'totalenergies.net',
 'tpt.net.cn',
 'ultradns.co.uk',
 'ultradns.info',
 'ultradns2.com',
 'ultradns2.org',
 'weber.cloud',
 'webercloud.de'}

In [40]:
cycle_vendor - vendor_target_set

{'bee-net',
 'biz-email',
 'bizcn',
 'bjedu',
 'cbcc',
 'cdnhost',
 'cein',
 'cnmsn',
 'corpease',
 'east',
 'globedom',
 'icann',
 'ptt',
 'tpt',
 'ultradns',
 'ultradns2',
 'weber',
 'webercloud'}

In [50]:
chonghe = cycle_sld & sld_target_set
chonghe

{'ccfccb.cn',
 'china-online.com.cn',
 'corp-email.com',
 'hccb.com.cn',
 'pzhu.edu.cn',
 'sia.ac.cn'}

In [52]:
cycle_dict = defaultdict(list)

extype = 'sld'

for i in cycle_fqdn:
    cur = extract(i,extype)
    if cur in chonghe:
        cycle_dict[cur].append(i)
        
cycle_dict

defaultdict(list,
            {'sia.ac.cn': ['es.sia.ac.cn'],
             'china-online.com.cn': ['ns1.china-online.com.cn'],
             'corp-email.com': ['ns.corp-email.com'],
             'ccfccb.cn': ['dns5.ccfccb.cn'],
             'pzhu.edu.cn': ['www.pzhu.edu.cn'],
             'hccb.com.cn': ['ns2.hccb.com.cn']})

In [53]:
chonghe = cycle_vendor & vendor_target_set
chonghe

{'acfic',
 'ccfccb',
 'china-online',
 'corp-email',
 'hccb',
 'hzbank',
 'pzhu',
 'sia',
 'totalenergies'}

In [80]:
cycle_dict = defaultdict(list)

extype = 'vendor'

for i in cycle_fqdn:
    cur = extract(i,extype)
    if cur in chonghe:
        cycle_dict[cur].append((vendor_targetsld_dict[cur],i))
        print(vendor_targetsld_dict[cur])
        
pp(cycle_dict)

['sia.cn', 'sia.ac.cn']
['totalenergies.cn']
['hzbank.com.cn']
['china-online.com.cn']
['hzbank.com.cn']
['corp-email.com', 'corp-email.cn']
['acfic.org.cn']
['ccfccb.cn']
['totalenergies.cn']
['pzhu.edu.cn']
['hccb.com.cn']
defaultdict(<class 'list'>,
            {'acfic': [(['acfic.org.cn'], 'ns.acfic.cn')],
             'ccfccb': [(['ccfccb.cn'], 'dns5.ccfccb.cn')],
             'china-online': [(['china-online.com.cn'],
                               'ns1.china-online.com.cn')],
             'corp-email': [(['corp-email.com', 'corp-email.cn'],
                             'ns.corp-email.com')],
             'hccb': [(['hccb.com.cn'], 'ns2.hccb.com.cn')],
             'hzbank': [(['hzbank.com.cn'], 'ns7.hzbank.net'),
                        (['hzbank.com.cn'], 'ns8.hzbank.net')],
             'pzhu': [(['pzhu.edu.cn'], 'www.pzhu.edu.cn')],
             'sia': [(['sia.cn', 'sia.ac.cn'], 'es.sia.ac.cn')],
             'totalenergies': [(['totalenergies.cn'], 'ns4.totalenergies.info'),

发现

1. 导致循环依赖的是中间的权威服务器域名

需要进一步看日志文件，才能知道具体情况。比如，是不是中间递归的时候，碰上了循环依赖


还有个问题：

有一些循环依赖的域名不属于我们的目标域名，说明他们是第三方DNS厂商，说不定他们影响的范围很大（也就是说他们可能多次被其他域名采用作为权威服务器）

In [63]:

def read_target_data(target):
    # 遍历目录中的所有文件
    for filename in os.listdir(OUTPUT_DIR):
        # 检查文件名中是否包含 'target'
        if target in filename:
            file_path = os.path.join(OUTPUT_DIR, filename)
            # 打开并读取文件内容
            return load_data(file_path)


In [75]:
# 先看一个输出文件吧，提取出对应的日志文件需要写一个函数

o = read_target_data('corp-email.com')

In [76]:
len(o)

288

In [79]:
o_err = [i for i in o if 'LOOP' in i]
len(o_err)

124

In [84]:
# 先看一个输出文件吧，提取出对应的日志文件需要写一个函数

o = read_target_data('sia.ac')

In [85]:
len(o)

9

In [88]:
o

["[('j.root-servers.net', '192.58.128.30', 'www.sia.ac.cn'), ('a.dns.cn', '203.119.25.1', 'www.sia.ac.cn'), ('es.sia.ac.cn', '210.72.131.131', 'www.sia.ac.cn'), ('www.sia.ac.cn', 'www.sia.cas.cn', 'www.sia.ac.cn')]",
 "[('j.root-servers.net', '192.58.128.30', 'www.sia.ac.cn'), ('b.dns.cn', '203.119.26.1', 'www.sia.ac.cn'), ('es.sia.ac.cn', '210.72.131.131', 'www.sia.ac.cn'), ('www.sia.ac.cn', 'www.sia.cas.cn', 'www.sia.ac.cn')]",
 "[('j.root-servers.net', '192.58.128.30', 'www.sia.ac.cn'), ('ns.cernet.net', '202.112.0.44', 'www.sia.ac.cn'), ('es.sia.ac.cn', '210.72.131.131', 'www.sia.ac.cn'), ('www.sia.ac.cn', 'www.sia.cas.cn', 'www.sia.ac.cn')]",
 "[('j.root-servers.net', '192.58.128.30', 'www.sia.ac.cn'), ('ns.cernet.net', '202.112.0.44', 'www.sia.ac.cn'), [('j.root-servers.net', '192.58.128.30', 'ns.sia.cn'), ('a.dns.cn', '203.119.25.1', 'ns.sia.cn'), ('ns.sia.cn', '210.72.133.252', 'ns.sia.cn'), ('ns.sia.cn', '210.72.133.252', 'ns.sia.cn')], ('ns.sia.cn', '210.72.133.252', 'www.sia

In [86]:
o_err = [i for i in o if 'LOOP' in i]
len(o_err)

3

In [87]:
o_err

["[('j.root-servers.net', '192.58.128.30', 'www.sia.ac.cn'), ('ns.cernet.net', '202.112.0.44', 'www.sia.ac.cn'), [('j.root-servers.net', '192.58.128.30', 'ns.sia.cn'), ('a.dns.cn', '203.119.25.1', 'ns.sia.cn'), [('j.root-servers.net', '192.58.128.30', 'es.sia.ac.cn'), ('es.sia.ac.cn', '$LOOP$', 'es.sia.ac.cn')], ('ns.sia.cn', '$$$', 'ns.sia.cn')], ('www.sia.ac.cn', '$$$', 'www.sia.ac.cn')]",
 "[('j.root-servers.net', '192.58.128.30', 'www.sia.ac.cn'), ('ns.cernet.net', '202.112.0.44', 'www.sia.ac.cn'), [('j.root-servers.net', '192.58.128.30', 'ns.sia.cn'), ('b.dns.cn', '203.119.26.1', 'ns.sia.cn'), [('j.root-servers.net', '192.58.128.30', 'es.sia.ac.cn'), ('es.sia.ac.cn', '$LOOP$', 'es.sia.ac.cn')], ('ns.sia.cn', '$$$', 'ns.sia.cn')], ('www.sia.ac.cn', '$$$', 'www.sia.ac.cn')]",
 "[('j.root-servers.net', '192.58.128.30', 'www.sia.ac.cn'), ('ns.cernet.net', '202.112.0.44', 'www.sia.ac.cn'), [('j.root-servers.net', '192.58.128.30', 'ns.sia.cn'), ('ns.cernet.net', '202.112.0.44', 'ns.sia.

In [89]:
# 以上这个例子有问题，没有loop，但是记录了loop

# 已查明，是依赖情况导致全局cache出现问题。因为全局cache会不断重复读写，导致链条上每个人获取的cache不是同一个版本的，有混乱
# 解决方法是：全局存储从 存all_paths 改成存 每次查询的 cmd_output



In [90]:
answers = []
','.join(answers)

''