In [1]:
import io

import netaddr
import itertools

from typing import Set

import pandas as pd

import aiohttp

from rpki_analysis.delegated_stats import read_delegated_extended_stats
from rpki_analysis.riswhois import read_ris_dump, RisWhoisLookupMoreLessSpecific, ExpandedRisEntry

In [2]:
from typing import Any

def to_cidrs(res: netaddr.IPRange | netaddr.IPNetwork | Any):
    if isinstance(res, netaddr.IPRange):
        return list([str(x) for x in res.cidrs()])
    elif isinstance(res, netaddr.IPNetwork):
            return [str(res)]
    else:
        return [res]

In [3]:
async with aiohttp.ClientSession() as session:
    async with session.get('https://ftp.ripe.net/pub/stats/ripencc/nro-stats/latest/nro-delegated-stats') as resp:
        df_extended = read_delegated_extended_stats(io.StringIO(await resp.text()))
        display("IGNORING asns")
        df_extended = df_extended.loc[df_extended.afi != 'asn']
    
    df_extended['resource'] = df_extended.resource.map(to_cidrs)
    df_extended = df_extended.explode("resource")
    
    df_riswhois = pd.concat([
        read_ris_dump('https://www.ris.ripe.net/dumps/riswhoisdump.IPv4.gz'),
        read_ris_dump('https://www.ris.ripe.net/dumps/riswhoisdump.IPv6.gz')
    ])

display(f"df_extended: {df_extended.shape}")
display(f"df_riswhois: {df_riswhois.shape}")

'IGNORING asns'

RIS dump contains row(s) with AS_SET! These will never be RPKI valid (https://tools.ietf.org/html/rfc6907#section-7.1.8)
RIS dump contains row(s) with AS_SET! These will never be RPKI valid (https://tools.ietf.org/html/rfc6907#section-7.1.8)


'df_extended: (620758, 10)'

'df_riswhois: (1441595, 4)'

In [4]:
df_riswhois

Unnamed: 0,origin,prefix,seen_by_peers,prefix_length
16,216425,0.0.0.0/0,1.0,0
17,50628,0.0.0.0/0,1.0,0
18,34927,0.0.0.0/0,2.0,0
19,35708,0.0.0.0/0,1.0,0
20,56755,0.0.0.0/0,1.0,0
...,...,...,...,...
270117,56655,fd00:10:130:151::253/128,1.0,128
270118,56655,fd00:10:130:151::254/128,1.0,128
270119,174,fd00:550:ffff:ffff:ffff:ffff:ffff:ffff/128,1.0,128
270120,174,fd00:978:ffff:ffff:ffff:ffff:ffff:ffff/128,1.0,128


In [5]:
df_extended.keys()

Index(['rir', 'country', 'afi', 'raw_resource', 'length', 'date', 'status',
       'opaque_id', 'category', 'resource'],
      dtype='object')

In [6]:
df_extended.loc[(df_extended.rir == 'ripencc') & (df_extended.afi != 'asn'), ['country', 'raw_resource', 'date', 'status']]

Unnamed: 0,country,raw_resource,date,status
109354,PS,1.178.112.0,2007-11-26 00:00:00+00:00,assigned
109355,PS,1.178.128.0,2007-11-26 00:00:00+00:00,assigned
109359,PS,1.178.208.0,2010-06-25 00:00:00+00:00,assigned
109360,ES,1.178.224.0,2010-06-25 00:00:00+00:00,assigned
109363,PS,1.179.40.0,2009-05-18 00:00:00+00:00,assigned
...,...,...,...,...
719037,ZZ,2a14:fd00::,2024-12-22 00:00:00+00:00,available
719038,ZZ,2a14:fe00::,2024-12-22 00:00:00+00:00,available
719039,ZZ,2a15::,2024-12-22 00:00:00+00:00,available
719040,ZZ,2a16::,2024-12-22 00:00:00+00:00,available


In [7]:
df_extended.loc[(df_extended.afi != 'asn')].shape

(620758, 10)

In [8]:
df_extended.loc[(df_extended.rir == 'ripencc') & (df_extended.afi == 'ipv4'), ['country', 'raw_resource', 'date', 'status']]

Unnamed: 0,country,raw_resource,date,status
109354,PS,1.178.112.0,2007-11-26 00:00:00+00:00,assigned
109355,PS,1.178.128.0,2007-11-26 00:00:00+00:00,assigned
109359,PS,1.178.208.0,2010-06-25 00:00:00+00:00,assigned
109360,ES,1.178.224.0,2010-06-25 00:00:00+00:00,assigned
109363,PS,1.179.40.0,2009-05-18 00:00:00+00:00,assigned
...,...,...,...,...
360912,DE,217.224.0.0,2001-04-04 00:00:00+00:00,assigned
361391,NL,220.158.196.0,2015-10-07 00:00:00+00:00,assigned
361794,IQ,223.25.120.0,2011-04-11 00:00:00+00:00,assigned
361820,NL,223.27.112.0,2010-08-24 00:00:00+00:00,assigned


In [9]:
lookup = RisWhoisLookupMoreLessSpecific(df_riswhois, visibility_threshold=15)

def lookup_filter(row) -> Set[ExpandedRisEntry]:
    return lookup[row['resource']]

Lookup only for RIPE NCC ipv4 space:

In [10]:
announcements = df_extended.loc[(df_extended.rir == 'ripencc') & (df_extended.afi == 'ipv4')].apply(lookup_filter, axis=1)

In [11]:
announced = set(itertools.chain.from_iterable(announcements.to_list()))

In [12]:
len(announcements)

96624

In [13]:
announcements

109354                 {(12975, 1.178.112.0/20, 364.0, 20)}
109355                 {(12975, 1.178.128.0/20, 364.0, 20)}
109359    {(12975, 1.178.220.0/22, 360.0, 22), (12975, 1...
109360    {(12479, 1.178.224.0/20, 361.0, 20), (12479, 1...
109363                  {(12975, 1.179.40.0/21, 363.0, 21)}
                                ...                        
360912                  {(3320, 217.224.0.0/11, 364.0, 11)}
361391                                                   {}
361794                                                   {}
361820                                                   {}
361821                {(54103, 223.27.114.0/24, 355.0, 24)}
Length: 96624, dtype: object

## Look up all the announcements


In [35]:
import time

t0 = time.time()
df_extended['announcements'] = df_extended.apply(lookup_filter, axis=1)
df_extended['num_announcements'] = df_extended.announcements.apply(len)
display(time.time() - t0)

16.567238092422485

In [36]:
announced = set(itertools.chain.from_iterable(announcements.to_list()))

In [37]:
len(df_extended[df_extended.rir == 'ripencc']['announcements'].to_list())

207122

In [38]:
def rir_data(df: pd.DataFrame) -> int:
    return sum(map(len, df.to_list()))

df_extended.groupby(['rir', 'afi'], observed=True).agg({'announcements': rir_data, 'resource': 'count'})

Unnamed: 0_level_0,Unnamed: 1_level_0,announcements,resource
rir,afi,Unnamed: 2_level_1,Unnamed: 3_level_1
afrinic,ipv4,31765,5669
afrinic,ipv6,1514,7941
apnic,ipv4,132741,55287
apnic,ipv6,35785,105124
arin,ipv4,164444,80367
arin,ipv6,20545,82844
iana,ipv4,3,51
iana,ipv6,1,98
lacnic,ipv4,79592,19929
lacnic,ipv6,31627,56326


In [42]:
t0 = time.time()
res = df_extended[(df_extended.rir == 'ripencc') & (df_extended.status != 'assigned') & (df_extended.num_announcements > 0)]
display(time.time() - t0)
display(res)

0.0031709671020507812

Unnamed: 0,rir,country,afi,raw_resource,length,date,status,opaque_id,category,resource,announcements,num_announcements
286229,ripencc,ZZ,ipv4,193.96.236.0,256,2024-12-22 00:00:00+00:00,reserved,ripencc,e-stats,193.96.236.0/24,"{(702, 193.96.0.0/13, 361.0, 13)}",1
286337,ripencc,ZZ,ipv4,193.99.153.0,256,2024-12-22 00:00:00+00:00,reserved,ripencc,e-stats,193.99.153.0/24,"{(702, 193.96.0.0/13, 361.0, 13)}",1
286358,ripencc,ZZ,ipv4,193.99.249.0,256,2024-12-22 00:00:00+00:00,reserved,ripencc,e-stats,193.99.249.0/24,"{(702, 193.96.0.0/13, 361.0, 13)}",1
286450,ripencc,ZZ,ipv4,193.101.167.0,256,2024-12-22 00:00:00+00:00,reserved,ripencc,e-stats,193.101.167.0/24,"{(702, 193.96.0.0/13, 361.0, 13)}",1
286455,ripencc,ZZ,ipv4,193.101.180.0,512,2024-12-22 00:00:00+00:00,reserved,ripencc,e-stats,193.101.180.0/23,"{(702, 193.96.0.0/13, 361.0, 13)}",1
286498,ripencc,ZZ,ipv4,193.102.74.0,256,2024-12-22 00:00:00+00:00,reserved,ripencc,e-stats,193.102.74.0/24,"{(702, 193.96.0.0/13, 361.0, 13)}",1
286552,ripencc,ZZ,ipv4,193.102.208.0,256,2024-12-22 00:00:00+00:00,reserved,ripencc,e-stats,193.102.208.0/24,"{(702, 193.96.0.0/13, 361.0, 13)}",1
286581,ripencc,ZZ,ipv4,193.103.164.0,256,2024-12-22 00:00:00+00:00,reserved,ripencc,e-stats,193.103.164.0/24,"{(702, 193.96.0.0/13, 361.0, 13)}",1
296675,ripencc,ZZ,ipv4,194.139.30.0,256,2024-12-22 00:00:00+00:00,reserved,ripencc,e-stats,194.139.30.0/24,"{(702, 194.139.0.0/16, 361.0, 16)}",1
297728,ripencc,ZZ,ipv4,194.173.169.0,256,2024-12-22 00:00:00+00:00,reserved,ripencc,e-stats,194.173.169.0/24,"{(702, 194.172.0.0/14, 361.0, 14)}",1
