In [1]:
import aiohttp
import lzma
import io

from datetime import datetime
from os.path import expanduser
from pathlib import Path
from typing import NamedTuple

import pandas as pd
import netaddr

from rpki_analysis.delegated_stats import read_delegated_extended_stats, StatsCombinedAllocations, RirLookup
from rpki_analysis.routinator import read_jsonext

In [2]:
df = await read_jsonext("https://rpki-validator.ripe.net/jsonext")

In [3]:
async with aiohttp.ClientSession() as session:
    async with session.get('https://ftp.ripe.net/pub/stats/ripencc/nro-stats/latest/nro-delegated-stats') as resp:
        df_delext_stats = read_delegated_extended_stats(io.StringIO(await resp.text()))

        rir_lookup = RirLookup(df_delext_stats)
        lookup = StatsCombinedAllocations(df_delext_stats[df_delext_stats.status == 'assigned'])

In [4]:
misses = list()

class PrefixDetails(NamedTuple):
    rir: str
    opaque_id: str
    covering_rir: str

def maybe_lookup(row: pd.DataFrame) -> PrefixDetails:
    containing_rir = rir_lookup.get(row.prefix)
    try:
        entry = lookup[row.prefix]
        return PrefixDetails(entry.rir, entry.opaque_id, containing_rir)
    except KeyError:
        misses.append(row)
        children = list(lookup.children(row.prefix))
        if children:
            child_resources = netaddr.IPSet([c.resource for c in children])
            child_opaque_ids = set(map(lambda r: r.opaque_id, children))
            child_rirs = set(map(lambda r: r.rir, children))
            
            if child_resources.issuperset(netaddr.IPSet([row.prefix])):
                return PrefixDetails(f"{'-'.join(child_rirs)}-multi-opaque-id", "_".join(child_opaque_ids), containing_rir)
                
            return PrefixDetails(f"unknown-more-specific-allocs-{"-".join(child_rirs)}", "_".join(child_opaque_ids), containing_rir)

df[['rir', 'opaque_id', 'containing_rir']] = df.apply(maybe_lookup, axis=1, result_type='expand')
df['publication_point'] = df.uri.apply(lambda uri: "/".join(uri.split("/")[:-1]))
df_unmatched = pd.DataFrame.from_records(misses)

In [5]:
for prefix in set(df[df.containing_rir.isnull()].prefix):
    display(rir_lookup[prefix], prefix)

'ripencc'

'2001:67c:df8::/48'

In [6]:
df_unmatched = df[df.containing_rir == None]

In [7]:
df_delext_stats.status.unique()

['reserved', 'assigned', 'available']
Categories (3, object): ['assigned', 'available', 'reserved']

In [8]:
display(df_unmatched.prefix.unique())
display(df.keys())

array([], dtype=object)

Index(['type', 'uri', 'tal', 'not_before', 'not_after', 'chain_not_before',
       'chain_not_after', 'asn', 'prefix', 'max_length', 'rir', 'opaque_id',
       'containing_rir', 'publication_point'],
      dtype='object')

# Check the tal against the RIR according to delegated extended statistics.

The code below detects some mis-alignment. Many of these will becaused by registration artifacts (two continous registrations controlled by one entity, that are under two IDs in the delegated stats.

In [9]:
#rir_lookup['43.236.0.0/16']
rir_lookup.get('91.207.19.0/24')

'ripencc'

In [10]:
home = Path(expanduser("~"))
now = datetime.now()

df_mismatch = df.loc[df.tal.str.replace("ripe", "ripencc") != df.rir, ['uri', 'tal', 'asn', 'prefix', 'max_length', 'rir', 'opaque_id', 'containing_rir']]
df_mismatch.to_excel(home / f"Desktop/{now.strftime('%Y%m%d')}-results-results.xlsx")
display(df_mismatch)

Unnamed: 0,uri,tal,asn,prefix,max_length,rir,opaque_id,containing_rir
19402,rsync://repo-rpki.idnic.net/repo/943e2e67-a171...,apnic,24532,27.111.32.0/19,24,apnic-multi-opaque-id,A92C2F3F_A9227744,apnic
43648,rsync://rpki.cnnic.cn/rpki/A9162E3D0000/137/7j...,apnic,24426,43.236.0.0/16,16,apnic-multi-opaque-id,A929F8EF_A9285E72_A9273302_A92CF084_A923FF55_A...,apnic
43649,rsync://rpki.cnnic.cn/rpki/A9162E3D0000/137/zW...,apnic,24426,43.239.0.0/19,19,apnic-multi-opaque-id,A92B5680_A92ED1B6_A926F8AD_A928BCC2_A928EEFA_A...,apnic
43650,rsync://rpki.cnnic.cn/rpki/A9162E3D0000/137/BP...,apnic,24426,43.239.32.0/20,20,apnic-multi-opaque-id,A92EA8D8_A92AE769_A92B7C8D_A92584A2,apnic
44294,rsync://rpki.cnnic.cn/rpki/A9162E3D0000/890/wO...,apnic,63567,43.248.176.0/20,20,apnic-multi-opaque-id,A923C604_A9299EB8_A92937E6_A92D4787,apnic
180388,rsync://rpki.apnic.net/member_repository/A918E...,apnic,45769,103.26.232.0/22,24,apnic-multi-opaque-id,A9230910_A92927D9,apnic
180790,rsync://rpki.cnnic.cn/rpki/A9162E3D0000/137/zW...,apnic,24426,103.35.0.0/19,19,apnic-multi-opaque-id,A92B5680_A92ED1B6_A92D3AA6_A926F8AD_A928BCC2_A...,apnic
180791,rsync://rpki.cnnic.cn/rpki/A9162E3D0000/137/BP...,apnic,24426,103.35.32.0/20,20,apnic-multi-opaque-id,A92EA8D8_A92AE769_A92B7C8D_A92584A2,apnic
191267,rsync://repo-rpki.idnic.net/repo/IDNIC-ID/2/AS...,apnic,150980,103.138.166.0/23,24,apnic-multi-opaque-id,A92840D5_A92532B9,apnic
200563,rsync://rpki.apnic.net/member_repository/A918E...,apnic,9829,103.204.48.0/22,22,apnic-multi-opaque-id,A92C6BC8_A929D211,apnic


In [11]:
df[df.containing_rir.isnull()]

Unnamed: 0,type,uri,tal,not_before,not_after,chain_not_before,chain_not_after,asn,prefix,max_length,rir,opaque_id,containing_rir,publication_point
435006,roa,rsync://rpki.ripe.net/repository/DEFAULT/00/da...,ripe,2024-02-19T09:18:28Z,2025-07-01T00:00:00Z,2024-02-19T09:18:28Z,2024-07-01T00:00:00Z,215492,2001:67c:df8::/48,48,,,,rsync://rpki.ripe.net/repository/DEFAULT/00/da...


# Duplicate VRPs

Look at what VRPs are duplicated the most often and how this happens

In [12]:
top_10 = df.groupby(["prefix", "asn", "rir", "max_length"]).count().nlargest(10, ['not_before'])
top_10

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,type,uri,tal,not_before,not_after,chain_not_before,chain_not_after,opaque_id,containing_rir,publication_point
prefix,asn,rir,max_length,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
131.221.164.0/22,27901,lacnic,22,8,8,8,8,8,8,8,8,8,8
138.99.224.0/22,27901,lacnic,22,8,8,8,8,8,8,8,8,8,8
167.250.52.0/22,27901,lacnic,22,8,8,8,8,8,8,8,8,8,8
207.248.192.0/19,27901,lacnic,19,8,8,8,8,8,8,8,8,8,8
190.63.0.0/16,23487,lacnic,24,7,7,7,7,7,7,7,7,7,7
2800:430::/32,23487,lacnic,48,7,7,7,7,7,7,7,7,7,7
179.60.64.0/19,27901,lacnic,19,6,6,6,6,6,6,6,6,6,6
181.113.157.0/24,27757,lacnic,24,6,6,6,6,6,6,6,6,6,6
181.113.97.0/24,27757,lacnic,24,6,6,6,6,6,6,6,6,6,6
181.196.11.0/24,27757,lacnic,24,6,6,6,6,6,6,6,6,6,6


# Maximum number of prefixes per ROA
Recall that a ROA has a single AS by definition: The grouping by AS is only to clarify what AS-es have this number

In [18]:
df.groupby(["uri", "asn"]).count().nlargest(10, 'rir')

Unnamed: 0_level_0,Unnamed: 1_level_0,type,tal,not_before,not_after,chain_not_before,chain_not_after,prefix,max_length,rir,opaque_id,containing_rir,publication_point
uri,asn,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
rsync://rpki.ripe.net/repository/DEFAULT/e2/88fdd1-8b7b-4101-ac66-b590957d9415/1/2djDTJ8rqddbdb7kfUve9c6ZPp0.roa,39891,6504,6504,6504,6504,6504,6504,6504,6504,6504,6504,6504,6504
rsync://rpki.apnic.net/member_repository/A91DFB70/2983647C838F11E586FC5812C4F9AE02/9B22F928BFE311EE949C2918C4F9AE02.roa,9299,4266,4266,4266,4266,4266,4266,4266,4266,4266,4266,4266,4266
rsync://rpki.ripe.net/repository/DEFAULT/34/41c321-347e-4c3e-affd-e2b527728e96/1/F8-dcpSK7eR4Q3eZ7DjNFD0hoS0.roa,8551,4127,4127,4127,4127,4127,4127,4127,4127,4127,4127,4127,4127
rsync://rpki.ripe.net/repository/DEFAULT/2f/4b7ef8-8643-4dbb-92aa-0740115b5a52/1/HLyZ7EIByAmhVb-ajQtU8qwlGa4.roa,5416,3933,3933,3933,3933,3933,3933,3933,3933,3933,3933,3933,3933
rsync://rpki.apnic.net/member_repository/A9197CED/83F56DAC1D8411E2910689DA08B02CD2/CEFF120A77B711EEACA2AA09C4F9AE02.roa,9829,3808,3808,3808,3808,3808,3808,3808,3808,3808,3808,3808,3808
rsync://rpki.ripe.net/repository/DEFAULT/31/0762da-d66f-4dcb-9c7f-802bb51a1bed/1/UMs8uIBP7mybEF6K3AYBQntR1M8.roa,50710,2719,2719,2719,2719,2719,2719,2719,2719,2719,2719,2719,2719
rsync://rpki.ripe.net/repository/DEFAULT/31/0762da-d66f-4dcb-9c7f-802bb51a1bed/1/UWNAJnUwK24DJTb0itLIyRvDuVo.roa,203214,2719,2719,2719,2719,2719,2719,2719,2719,2719,2719,2719,2719
rsync://rpki.ripe.net/repository/DEFAULT/31/0762da-d66f-4dcb-9c7f-802bb51a1bed/1/r5tgyRrZAFq7SVMz8QPO9Cp27U0.roa,199739,2719,2719,2719,2719,2719,2719,2719,2719,2719,2719,2719,2719
rsync://rpki.ripe.net/repository/DEFAULT/b0/742370-e10f-4541-aa6c-61afb66d1e8c/1/_nkBjWcrxoXZC62xnjSUywyZuj8.roa,5384,2577,2577,2577,2577,2577,2577,2577,2577,2577,2577,2577,2577
rsync://rpki.apnic.net/member_repository/A914CE75/D22EF3D6FF4B11E281BE06625911EA32/FA3B1B5E32DD11EE8A2D5942C4F9AE02.roa,4755,2326,2326,2326,2326,2326,2326,2326,2326,2326,2326,2326,2326


# Analysis by publication point:
Wat is the maximum number of ROAs, total VRPs per publication point (~= certificate for most CAs)

```
$ rsync rsync://rpki.arin.net/repository/arin-rpki-ta/5e4a23ea-e80a-403e-b08c-2171da2157d3/2a246947-2d62-4a6c-ba05-87187f0099b2/4e95a28e-27fe-479a-b086-2cc9809d54f6/ | wc -l
20729
```

In [None]:
df.groupby(['publication_point']).count().nlargest(10, ['uri'])

The total number of files per publicationpoint:

In [None]:
df.drop_duplicates(['publication_point', 'uri']).groupby(['publication_point']).count().nlargest(10, ['uri'])

publication points generally contain one ROA for an AS, let's check

# TODO

Now count prefixes per publication point

# Now let's work on unique VRPs

In [None]:
df = df.reset_index().drop_duplicates(['asn', 'prefix', 'max_length'])

The ROA with the most prefixes:

```python
```

In [None]:
df.groupby(["asn"]).count().nlargest(10, ['index'])

Prefix with most ROAs:

In [None]:
df.groupby(['prefix']).count().nlargest(10, ['roa'])