In [1]:
import io

import netaddr
import itertools

from typing import Set

import pandas as pd

import aiohttp

from rpki_analysis.delegated_stats import read_delegated_extended_stats
from rpki_analysis.riswhois import read_ris_dump, RisWhoisLookupMoreLessSpecific, ExpandedRisEntry

In [2]:
async with aiohttp.ClientSession() as session:
    async with session.get('https://ftp.ripe.net/pub/stats/ripencc/nro-stats/latest/nro-delegated-stats') as resp:
        df_extended = read_delegated_extended_stats(io.StringIO(await resp.text()))

    df_riswhois = read_ris_dump('https://www.ris.ripe.net/dumps/riswhoisdump.IPv4.gz')
        

RIS dump contains row(s) with AS_SET! These will never be RPKI valid (https://tools.ietf.org/html/rfc6907#section-7.1.8)


In [3]:
df_riswhois

Unnamed: 0,origin,prefix,seen_by_peers,prefix_length
16,216425,0.0.0.0/0,1.0,0
17,50628,0.0.0.0/0,1.0,0
18,34927,0.0.0.0/0,2.0,0
19,35708,0.0.0.0/0,1.0,0
20,56755,0.0.0.0/0,1.0,0
...,...,...,...,...
1171506,63199,223.255.250.0/24,360.0,24
1171507,63199,223.255.251.0/24,360.0,24
1171508,58519,223.255.252.0/24,358.0,24
1171509,58519,223.255.253.0/24,358.0,24


In [4]:
df_extended.keys()

Index(['rir', 'country', 'afi', 'raw_resource', 'length', 'date', 'status',
       'opaque_id', 'category', 'resource'],
      dtype='object')

In [5]:
df_extended.loc[(df_extended.rir == 'ripencc') & (df_extended.afi != 'asn'), ['country', 'raw_resource', 'date', 'status']]

Unnamed: 0,country,raw_resource,date,status
109354,PS,1.178.112.0,2007-11-26 00:00:00+00:00,assigned
109355,PS,1.178.128.0,2007-11-26 00:00:00+00:00,assigned
109359,PS,1.178.208.0,2010-06-25 00:00:00+00:00,assigned
109360,ES,1.178.224.0,2010-06-25 00:00:00+00:00,assigned
109363,PS,1.179.40.0,2009-05-18 00:00:00+00:00,assigned
...,...,...,...,...
719037,ZZ,2a14:fd00::,2024-12-22 00:00:00+00:00,available
719038,ZZ,2a14:fe00::,2024-12-22 00:00:00+00:00,available
719039,ZZ,2a15::,2024-12-22 00:00:00+00:00,available
719040,ZZ,2a16::,2024-12-22 00:00:00+00:00,available


In [6]:
df_extended.loc[(df_extended.afi != 'asn')].shape

(615598, 10)

In [7]:
df_extended.loc[(df_extended.rir == 'ripencc') & (df_extended.afi == 'ipv4'), ['country', 'raw_resource', 'date', 'status']]

Unnamed: 0,country,raw_resource,date,status
109354,PS,1.178.112.0,2007-11-26 00:00:00+00:00,assigned
109355,PS,1.178.128.0,2007-11-26 00:00:00+00:00,assigned
109359,PS,1.178.208.0,2010-06-25 00:00:00+00:00,assigned
109360,ES,1.178.224.0,2010-06-25 00:00:00+00:00,assigned
109363,PS,1.179.40.0,2009-05-18 00:00:00+00:00,assigned
...,...,...,...,...
360912,DE,217.224.0.0,2001-04-04 00:00:00+00:00,assigned
361391,NL,220.158.196.0,2015-10-07 00:00:00+00:00,assigned
361794,IQ,223.25.120.0,2011-04-11 00:00:00+00:00,assigned
361820,NL,223.27.112.0,2010-08-24 00:00:00+00:00,assigned


In [8]:
lookup = RisWhoisLookupMoreLessSpecific(df_riswhois, visibility_threshold=15)

def lookup_filter(row) -> Set[ExpandedRisEntry]:
    return lookup[row['raw_resource']]

In [13]:
announcements = df_extended.loc[(df_extended.rir == 'ripencc') & (df_extended.afi == 'ipv4')].sample(1000).apply(lookup_filter, axis=1)

In [14]:
announced = set(itertools.chain.from_iterable(announcements.to_list()))

In [15]:
len(announced)

975

In [16]:
announcements

167115                {(57145, 91.226.250.0/24, 396.0, 24)}
168398                 {(31727, 91.236.26.0/23, 369.0, 23)}
153271                                                   {}
302682                {(12741, 195.225.68.0/24, 395.0, 24)}
353463    {(44539, 212.66.56.0/24, 406.0, 24), (44539, 2...
                                ...                        
244819                 {(32806, 185.38.32.0/22, 362.0, 22)}
119797                 {(44486, 37.44.215.0/24, 369.0, 24)}
155247                  {(31608, 84.205.0.0/19, 398.0, 19)}
164355                 {(3209, 91.208.193.0/24, 365.0, 24)}
129565                 {(60781, 45.154.56.0/24, 386.0, 24)}
Length: 1000, dtype: object