In [1]:
import aiohttp
import lzma
import io

from datetime import datetime
from os.path import expanduser
from pathlib import Path
from typing import NamedTuple

import pandas as pd
import netaddr

from rpki_analysis.delegated_stats import read_delegated_stats, StatsCombinedAllocations, RirLookup
from rpki_analysis.routinator import read_jsonext

In [2]:
df = await read_jsonext("https://rpki-validator.ripe.net/jsonext")

In [3]:
async with aiohttp.ClientSession() as session:
    async with session.get('https://ftp.ripe.net/pub/stats/ripencc/nro-stats/latest/nro-delegated-stats') as resp:
        df_delegated_stats = read_delegated_stats(io.StringIO(await resp.text()))

        rir_lookup = RirLookup(df_delegated_stats)
        lookup = StatsCombinedAllocations(df_delegated_stats[df_delegated_stats.status == 'assigned'])

In [27]:
for prefix in set(df[df.containing_rir.isnull()].prefix):
    display(rir_lookup[prefix], prefix)

'apnic'

'2401:7ae0:4000::/36'

'apnic'

'2401:7ae0::/32'

'apnic'

'157.15.53.0/24'

'apnic'

'103.73.237.0/24'

'apnic'

'157.15.52.0/24'

'ripencc'

'45.145.39.0/24'

'apnic'

'103.73.236.0/22'

'apnic'

'2401:7ae0::/36'

'ripencc'

'91.207.19.0/24'

'apnic'

'103.73.236.0/24'

'apnic'

'157.15.61.0/24'

'apnic'

'157.15.52.0/23'

'apnic'

'103.73.238.0/24'

'apnic'

'103.73.239.0/24'

'apnic'

'2401:7ae0:c000::/36'

'apnic'

'157.15.60.0/24'

In [37]:
df_mismatch[df_mismatch.containing_rir == None]

Unnamed: 0,uri,tal,asn,prefix,max_length,rir,opaque_id,containing_rir


In [4]:
df_delegated_stats.status.unique()

['reserved', 'assigned', 'available']
Categories (3, object): ['assigned', 'available', 'reserved']

In [5]:
misses = list()

class PrefixDetails(NamedTuple):
    rir: str
    opaque_id: str
    covering_rir: str

def maybe_lookup(row: pd.DataFrame) -> PrefixDetails:
    containing_rir = rir_lookup.get(row.prefix)
    try:
        entry = lookup[row.prefix]
        return PrefixDetails(entry.rir, entry.opaque_id, containing_rir)
    except KeyError:
        misses.append(row)
        children = list(lookup.children(row.prefix))
        if children:
            child_resources = netaddr.IPSet([c.resource for c in children])
            child_opaque_ids = set(map(lambda r: r.opaque_id, children))
            child_rirs = set(map(lambda r: r.rir, children))
            
            if child_resources.issuperset(netaddr.IPSet([row.prefix])):
                return PrefixDetails(f"{'-'.join(child_rirs)}-multi-opaque-id", "_".join(child_opaque_ids), containing_rir)
                
            return PrefixDetails(f"unknown-more-specific-allocs-{"-".join(child_rirs)}", "_".join(child_opaque_ids), containing_rir)

df[['rir', 'opaque_id', 'containing_rir']] = df.apply(maybe_lookup, axis=1, result_type='expand')
df['publication_point'] = df.uri.apply(lambda uri: "/".join(uri.split("/")[:-1]))
df_unmatched = pd.DataFrame.from_records(misses)

In [6]:
display(df_unmatched.prefix.unique())
display(df.keys())

array(['2c0f:ffd8::/32'], dtype=object)

Index(['type', 'uri', 'tal', 'not_before', 'not_after', 'chain_not_before',
       'chain_not_after', 'asn', 'prefix', 'max_length', 'rir', 'opaque_id',
       'containing_rir', 'publication_point'],
      dtype='object')

# Check the tal against the RIR according to delegated extended statistics.

The code below detects some mis-alignment. Many of these will becaused by registration artifacts (two continous registrations controlled by one entity, that are under two IDs in the delegated stats.

In [7]:
#rir_lookup['43.236.0.0/16']
rir_lookup.get('91.207.19.0/24')

'ripencc'

In [8]:
home = Path(expanduser("~"))
now = datetime.now()

df_mismatch = df.loc[df.tal.str.replace("ripe", "ripencc") != df.rir, ['uri', 'tal', 'asn', 'prefix', 'max_length', 'rir', 'opaque_id', 'containing_rir']]
df_mismatch.to_excel(home / f"Desktop/{now.strftime('%Y%m%d')}-results-results.xlsx")
display(df_mismatch)

Unnamed: 0,uri,tal,asn,prefix,max_length,rir,opaque_id,containing_rir
19295,rsync://repo-rpki.idnic.net/repo/943e2e67-a171...,apnic,AS24532,27.111.32.0/19,24,apnic-multi-opaque-id,A9227744_A92C2F3F,apnic
42420,rsync://rpki.cnnic.cn/rpki/A9162E3D0000/137/OX...,apnic,AS24426,43.236.0.0/16,16,apnic-multi-opaque-id,A921CFB0_A927E4E8_A9225E10_A928C453_A9273302_A...,apnic
42421,rsync://rpki.cnnic.cn/rpki/A9162E3D0000/137/pg...,apnic,AS24426,43.239.0.0/19,19,apnic-multi-opaque-id,A92B5680_A92ED1B6_A92A4917_A928BCC2_A926F8AD_A...,apnic
42422,rsync://rpki.cnnic.cn/rpki/A9162E3D0000/137/q0...,apnic,AS24426,43.239.32.0/20,20,apnic-multi-opaque-id,A92584A2_A92EA8D8_A92B7C8D_A92AE769,apnic
43066,rsync://rpki.cnnic.cn/rpki/A9162E3D0000/890/Y_...,apnic,AS63567,43.248.176.0/20,20,apnic-multi-opaque-id,A92937E6_A9299EB8_A923C604_A92D4787,apnic
51275,rsync://rpki.ripe.net/repository/DEFAULT/cc/b7...,ripe,AS208058,45.145.39.0/24,24,,,
148906,rsync://rpki.ripe.net/repository/DEFAULT/b7/4f...,ripe,AS7018,91.207.19.0/24,24,,,
177665,rsync://rpki.apnic.net/member_repository/A918E...,apnic,AS45769,103.26.232.0/22,24,apnic-multi-opaque-id,A9230910_A92927D9,apnic
178066,rsync://rpki.cnnic.cn/rpki/A9162E3D0000/137/pg...,apnic,AS24426,103.35.0.0/19,19,apnic-multi-opaque-id,A92B5680_A92ED1B6_A92A4917_A92D3AA6_A9288770_A...,apnic
178067,rsync://rpki.cnnic.cn/rpki/A9162E3D0000/137/q0...,apnic,AS24426,103.35.32.0/20,20,apnic-multi-opaque-id,A92584A2_A92EA8D8_A92B7C8D_A92AE769,apnic


In [9]:
df[df.containing_rir.isnull()]

Unnamed: 0,type,uri,tal,not_before,not_after,chain_not_before,chain_not_after,asn,prefix,max_length,rir,opaque_id,containing_rir,publication_point
51275,roa,rsync://rpki.ripe.net/repository/DEFAULT/cc/b7...,ripe,2024-01-29T12:24:40Z,2025-07-01T00:00:00Z,2024-01-29T18:24:02Z,2024-07-01T00:00:00Z,AS208058,45.145.39.0/24,24,,,,rsync://rpki.ripe.net/repository/DEFAULT/cc/b7...
148906,roa,rsync://rpki.ripe.net/repository/DEFAULT/b7/4f...,ripe,2024-01-29T12:08:39Z,2025-07-01T00:00:00Z,2024-01-29T18:24:02Z,2024-07-01T00:00:00Z,AS7018,91.207.19.0/24,24,,,,rsync://rpki.ripe.net/repository/DEFAULT/b7/4f...
181090,roa,rsync://rpki.apnic.net/member_repository/A9142...,apnic,2024-01-29T03:45:12Z,2024-12-01T00:00:00Z,2024-01-29T03:45:12Z,2024-05-02T09:05:49Z,AS132686,103.73.236.0/24,24,,,,rsync://rpki.apnic.net/member_repository/A9142...
181091,roa,rsync://rpki.apnic.net/member_repository/A9142...,apnic,2024-01-29T03:45:12Z,2024-12-01T00:00:00Z,2024-01-29T03:45:12Z,2024-05-02T09:05:49Z,AS132686,103.73.237.0/24,24,,,,rsync://rpki.apnic.net/member_repository/A9142...
181092,roa,rsync://rpki.apnic.net/member_repository/A9142...,apnic,2024-01-29T03:45:12Z,2024-12-01T00:00:00Z,2024-01-29T03:45:12Z,2024-05-02T09:05:49Z,AS132686,103.73.238.0/24,24,,,,rsync://rpki.apnic.net/member_repository/A9142...
181093,roa,rsync://rpki.apnic.net/member_repository/A9142...,apnic,2024-01-29T03:45:12Z,2024-12-01T00:00:00Z,2024-01-29T03:45:12Z,2024-05-02T09:05:49Z,AS132686,103.73.239.0/24,24,,,,rsync://rpki.apnic.net/member_repository/A9142...
181094,roa,rsync://rpki.apnic.net/member_repository/A9142...,apnic,2024-01-29T03:45:12Z,2024-12-01T00:00:00Z,2024-01-29T03:45:12Z,2024-05-02T09:05:49Z,AS132686,103.73.236.0/22,22,,,,rsync://rpki.apnic.net/member_repository/A9142...
266148,roa,rsync://rpki.apnic.net/member_repository/A913C...,apnic,2024-01-29T05:40:08Z,2025-05-01T00:00:00Z,2024-01-29T05:40:08Z,2024-05-02T09:05:49Z,AS152447,157.15.52.0/24,24,,,,rsync://rpki.apnic.net/member_repository/A913C...
266149,roa,rsync://rpki.apnic.net/member_repository/A913C...,apnic,2024-01-29T05:40:08Z,2025-05-01T00:00:00Z,2024-01-29T05:40:08Z,2024-05-02T09:05:49Z,AS152447,157.15.53.0/24,24,,,,rsync://rpki.apnic.net/member_repository/A913C...
266150,roa,rsync://rpki.apnic.net/member_repository/A913C...,apnic,2024-01-29T05:40:08Z,2025-05-01T00:00:00Z,2024-01-29T05:40:08Z,2024-05-02T09:05:49Z,AS152447,157.15.52.0/23,23,,,,rsync://rpki.apnic.net/member_repository/A913C...


# Duplicate VRPs

Look at what VRPs are duplicated the most often and how this happens

In [10]:
top_10 = df.groupby(["prefix", "asn", "rir", "max_length"]).count().nlargest(10, ['not_before'])
top_10

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,type,uri,tal,not_before,not_after,chain_not_before,chain_not_after,opaque_id,containing_rir,publication_point
prefix,asn,rir,max_length,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
131.221.164.0/22,AS27901,lacnic,22,8,8,8,8,8,8,8,8,8,8
138.99.224.0/22,AS27901,lacnic,22,8,8,8,8,8,8,8,8,8,8
167.250.52.0/22,AS27901,lacnic,22,8,8,8,8,8,8,8,8,8,8
207.248.192.0/19,AS27901,lacnic,19,8,8,8,8,8,8,8,8,8,8
190.63.0.0/16,AS23487,lacnic,24,7,7,7,7,7,7,7,7,7,7
2800:430::/32,AS23487,lacnic,48,7,7,7,7,7,7,7,7,7,7
154.223.31.0/24,AS63139,afrinic,24,6,6,6,6,6,6,6,6,6,6
179.60.64.0/19,AS27901,lacnic,19,6,6,6,6,6,6,6,6,6,6
181.113.157.0/24,AS27757,lacnic,24,6,6,6,6,6,6,6,6,6,6
181.113.97.0/24,AS27757,lacnic,24,6,6,6,6,6,6,6,6,6,6


# Maximum number of prefixes per ROA
Recall that a ROA has a single AS by definition: The grouping by AS is only to clarify what AS-es have this number

In [11]:
df.groupby(["uri", "asn"]).count().nlargest(10, 'index')

KeyError: 'index'

# Analysis by publication point:
Wat is the maximum number of ROAs, total VRPs per publication point (~= certificate for most CAs)

```
$ rsync rsync://rpki.arin.net/repository/arin-rpki-ta/5e4a23ea-e80a-403e-b08c-2171da2157d3/2a246947-2d62-4a6c-ba05-87187f0099b2/4e95a28e-27fe-479a-b086-2cc9809d54f6/ | wc -l
20729
```

In [None]:
df.groupby(['publication_point']).count().nlargest(10, ['uri'])

The total number of files per publicationpoint:

In [None]:
df.drop_duplicates(['publication_point', 'uri']).groupby(['publication_point']).count().nlargest(10, ['uri'])

publication points generally contain one ROA for an AS, let's check

# TODO

Now count prefixes per publication point

# Now let's work on unique VRPs

In [None]:
df = df.reset_index().drop_duplicates(['asn', 'prefix', 'max_length'])

The ROA with the most prefixes:

```python
```

In [None]:
df.groupby(["asn"]).count().nlargest(10, ['index'])

Prefix with most ROAs:

In [None]:
df.groupby(['prefix']).count().nlargest(10, ['roa'])