In [143]:
import datetime
import gzip
import radix
import re
import math
import pandas as pd

In [131]:
base = datetime.datetime.today()
days = [base - datetime.timedelta(days=x) for x in range(1, 365)]
days = [(x.year,x.month,x.day) for x in days]

In [132]:
d = days[0] # yesterday
fn_del = "data/delegated-apnic-%04d%02d%02d.gz" % (d[0],d[1],d[2])

In [133]:
f_del = gzip.open(fn_del, "rt")
del_lines = f_del.readlines()
seen_tree = radix.Radix()
for line in del_lines:
    if "ipv4" not in line:
        continue
    if "summary" in line:
        continue
    row = line[:-1].split("|")
    network = row[3]
    prefixlen = 32 - math.log(int(row[4]),2)
    alloc_date = "%s-%s-%s" % (row[5][0:4], row[5][4:6], row[5][6:8])
    ipblock = "%s/%d" % (network, prefixlen)
    snode = seen_tree.add(ipblock)
    snode.data["seen_date"] = None
    snode.data["alloc_date"] = alloc_date
    snode.data["asn"] = None

In [134]:
len(seen_tree.nodes())

41196

In [135]:
def get_unseen():
    ret = []
    for snode in seen_tree.nodes():
        if snode.data["seen_date"] is None:
            ret.append(snode)
    return ret

In [136]:
len(get_unseen())

41196

In [137]:
def get_dmp_tree(date):
    fn_dmp = "data/%s.dmp.gz" % date
    f_dmp = gzip.open(fn_dmp, "rt")
    dmp = f_dmp.read()
    dmp = dmp.replace("\n","")
    dmp_lines = dmp.split("*")
    f_dmp.close()
    dmp_tree = radix.Radix()
    for line in dmp_lines:
        if "/" not in line:
            continue
        row = re.sub('{.*}|>',"",line).split()
        prefix = row[0]
        # net = prefix.split("/")[0]    
        # mask = prefix.split("/")[1]
        asn = row[-2]
        rnode = dmp_tree.search_best(prefix)
        if rnode is None:
            rnode = dmp_tree.add(prefix)
            rnode.data["asn"] = asn
    return dmp_tree

In [138]:
def walk_dmp(date):
    dmp_tree = get_dmp_tree(date)
    i = 0
    for snode in get_unseen():
        i += 1
        rnode = dmp_tree.search_worst(snode.prefix)
        if rnode is not None:
            snode.data["seen_date"] = date
            snode.data["asn"] = rnode.data["asn"]

In [139]:
for d in days:
    day = "%04d-%02d-%02d"%(d[0],d[1],d[2])
    walk_dmp(day)
    print("%s\t%d"%(day,len(get_unseen())))

2019-02-22	24189
2019-02-21	24177
2019-02-20	24170
2019-02-19	24165
2019-02-18	24153
2019-02-17	24152
2019-02-16	24148
2019-02-15	24139
2019-02-14	24131
2019-02-13	24127
2019-02-12	24109
2019-02-11	24105
2019-02-10	24104
2019-02-09	24103
2019-02-08	24099
2019-02-07	24086
2019-02-06	24076
2019-02-05	24076
2019-02-04	24071
2019-02-03	24071
2019-02-02	24070
2019-02-01	24060
2019-01-31	24055
2019-01-30	24048
2019-01-29	24047
2019-01-28	24041
2019-01-27	24027
2019-01-26	24024
2019-01-25	24020
2019-01-24	24013
2019-01-23	24012
2019-01-22	24003
2019-01-21	24003
2019-01-20	24000
2019-01-19	23999
2019-01-18	23998
2019-01-17	23995
2019-01-16	23991
2019-01-15	23985
2019-01-14	23980
2019-01-13	23979
2019-01-12	23974
2019-01-11	23972
2019-01-10	23961
2019-01-09	23945
2019-01-08	23939
2019-01-07	23932
2019-01-06	23931
2019-01-05	23929
2019-01-04	23928
2019-01-03	23922
2019-01-02	23919
2019-01-01	23917
2018-12-31	23912
2018-12-30	23910
2018-12-29	23908
2018-12-28	23906
2018-12-27	23903
2018-12-26	238

In [161]:
len(lst_dict)

41196

In [157]:
lst_dict = []
for snode in seen_tree.nodes():
    row = {
        "prefix"     : snode.prefix,
        "prefixlen"  : snode.prefixlen,
        "alloc_date" : snode.data["alloc_date"],
        "seen_date"  : snode.data["seen_date"],
        "asn"        : snode.data["asn"]
    }
    lst_dict.append(row)

In [162]:
df = pd.DataFrame(lst_dict)
df = df[["prefix","prefixlen","asn","alloc_date","seen_date"]]

In [173]:
df.to_csv("unused.tsv", sep='\t', encoding='utf-8')

In [182]:
df.sort_values(by=["seen_date"],ascending=False).head(3)

Unnamed: 0,prefix,prefixlen,asn,alloc_date,seen_date
0,1.0.0.0/24,24,13335,2011-08-11,2019-02-22
23289,121.254.0.0/18,18,23563,2006-07-19,2019-02-22
23297,122.0.64.0/18,18,24422,2006-07-24,2019-02-22


In [185]:
df.sort_values(by=["seen_date"],ascending=True).head(3)

Unnamed: 0,prefix,prefixlen,asn,alloc_date,seen_date
8685,103.43.148.0/22,22,133854,2014-11-24,2018-02-25
21670,115.69.128.0/19,19,45271,2008-07-18,2018-02-26
16924,103.205.148.0/22,22,134853,2016-01-01,2018-02-26


In [183]:
df[df["seen_date"].isnull()].head(3)

Unnamed: 0,prefix,prefixlen,asn,alloc_date,seen_date
4,1.0.8.0/21,21,,2011-04-12,
5,1.0.16.0/20,20,,2011-04-12,
6,1.0.32.0/19,19,,2011-04-12,


In [188]:
n_seen   = len(df[df["seen_date"].notnull()])
n_unseen = len(df[df["seen_date"].isnull()])

print("# used    : %d bgp entries" % n_seen  )
print("# un-used : %d bgp entries" % n_unseen)

# seen   : 18427 bgp entries
# unseen : 22769 bgp entries


In [184]:
df[df["seen_date"].notnull()].head(3)

Unnamed: 0,prefix,prefixlen,asn,alloc_date,seen_date
0,1.0.0.0/24,24,13335,2011-08-11,2019-02-22
1,1.0.1.0/24,24,200094,2011-04-14,2019-01-10
2,1.0.2.0/23,23,200094,2011-04-14,2019-01-10
