# Find Bogon

In [104]:
import datetime
import gzip
import bz2
import radix
import re
import math
import pandas as pd

In [105]:
fn_dmp_apn = "data/2019-02-22.dmp.gz"

fn_del_afr = "data/delegated-afrinic-20190222"
fn_del_apn = "data/delegated-apnic-20190222.gz"
fn_del_ari = "data/delegated-arin-extended-20190222"
fn_del_lac = "data/delegated-lacnic-20190222"
fn_del_rip = "data/delegated-ripencc-20190222.bz2"

In [106]:
f_dmp = gzip.open(fn_dmp_apn,"rt")
dmp = f_dmp.read()
dmp = dmp.replace("\n","")
dmp_lines = dmp.split("*")
f_dmp.close()

In [107]:
f_del_afr = open(fn_del_afr,"r")
f_del_apn = gzip.open(fn_del_apn,"rt")
f_del_ari = open(fn_del_ari,"r")
f_del_lac = open(fn_del_lac,"r")
f_del_rip = bz2.open(fn_del_rip,"rt")

lines_del_afr = f_del_afr.readlines()
lines_del_apn = f_del_apn.readlines()
lines_del_ari = f_del_ari.readlines()
lines_del_lac = f_del_lac.readlines()
lines_del_rip = f_del_rip.readlines()

f_del_afr.close()
f_del_apn.close()
f_del_ari.close()
f_del_lac.close()
f_del_rip.close()

In [148]:
dmp_tree = radix.Radix()

for line in dmp_lines:
    if "/" not in line:
        continue
    row = re.sub('{.*}|>',"",line[:-1]).split()
    prefix = row[0]
    asn = row[-2]
    rnode = dmp_tree.search_best(prefix)
    if rnode is None:
        rnode = dmp_tree.add(prefix)
        rnode.data["asn"] = asn
        rnode.data["registry"] = None
        rnode.data["economy" ] = None
        rnode.data["nhost"   ] = None
        rnode.data["date"    ] = None
        rnode.data["status"  ] = None

In [149]:
lines_del = [lines_del_afr, lines_del_apn, lines_del_ari, lines_del_lac, lines_del_rip]

for lines in lines_del:
    i = 0
    for line in lines:
        if "ipv4" not in line:
            continue
        if "summary" in line:
            continue
        i += 1
        row = line[:-1].split("|")
        # print(row)
        # break
        registry   = row[0]
        economy    = row[1]
        network    = row[3]
        nhost      = int(row[4])
        prefixlen  = 32 - math.log(nhost,2)
        prefix     = "%s/%d" % (network, prefixlen)
        date       = "%s-%s-%s" % (row[5][0:4], row[5][4:6], row[5][6:8])
        status     = row[6]
        rnode = dmp_tree.search_best(prefix)
        if rnode is not None:
            rnode.data["registry" ] = registry
            rnode.data["economy"  ] = economy
            rnode.data["nhost"    ] = nhost
            rnode.data["date"     ] = date
            rnode.data["status"   ] = status

In [157]:
lst_dict = []
for rnode in dmp_tree.nodes():
    row = {
        "prefix"     : rnode.prefix,
        "prefixlen"  : rnode.prefixlen,
        "nhost"     : rnode.data["nhost"    ],
        "asn"        : rnode.data["asn"],
        "registry"  : rnode.data["registry" ],
        "economy"   : rnode.data["economy"  ],
        "date"      : rnode.data["date"     ],
        "status"    : rnode.data["status"   ],
    }
    lst_dict.append(row)

In [158]:
df = pd.DataFrame(lst_dict)
df = df[["prefix","prefixlen","nhost","asn","registry","economy","date","status"]]

In [133]:
df.to_csv("bogon.tsv", sep='\t', encoding='utf-8')

In [162]:
len(df.index)

311555

In [163]:
df.head(5)

Unnamed: 0,prefix,prefixlen,nhost,asn,registry,economy,date,status
0,1.0.0.0/24,24,256.0,2516,apnic,AU,2011-08-11,assigned
1,1.0.4.0/22,22,1024.0,38803,apnic,AU,2011-04-12,allocated
2,1.0.16.0/24,24,,2519,,,,
3,1.0.64.0/18,18,16384.0,7670,apnic,JP,2011-04-12,allocated
4,1.0.128.0/17,17,32768.0,38040,apnic,TH,2011-04-08,allocated


In [177]:
df_status = df.groupby(["status"]).size().reset_index(name='counts')
row_null = {
    "status":"none", 
    "counts":len(df[df["status"].isnull()])
}
df_status = df_status.append(row_null,ignore_index=True)

In [178]:
df_status

Unnamed: 0,status,counts
0,allocated,67508
1,assigned,18893
2,reserved,31
3,none,225123


In [179]:
df_registry = df.groupby(["registry"]).size().reset_index(name='counts')

In [180]:
df_registry

Unnamed: 0,registry,counts
0,afrinic,1649
1,apnic,15512
2,arin,17815
3,lacnic,9990
4,ripencc,41466


In [183]:
df_bogon = df[df["status"]!="allocated"]

In [184]:
len(df_bogon.index)

244047

In [185]:
df_bogon.head(10)

Unnamed: 0,prefix,prefixlen,nhost,asn,registry,economy,date,status
0,1.0.0.0/24,24,256.0,2516,apnic,AU,2011-08-11,assigned
2,1.0.16.0/24,24,,2519,,,,
5,1.1.1.0/24,24,256.0,2516,apnic,AU,2011-08-11,assigned
7,1.1.20.0/24,24,,4637,,,,
8,1.1.64.0/19,19,,2519,,,,
9,1.1.103.0/24,24,,2519,,,,
10,1.1.104.0/24,24,,2519,,,,
11,1.1.105.0/24,24,,2519,,,,
12,1.1.106.0/24,24,,2519,,,,
13,1.1.107.0/24,24,,2519,,,,
