# [RiPKI: The Tragic Story of RPKI Deployment in the Web Ecosystem](http://www.eecs.qmul.ac.uk/~tysong/files/HotNets15.pdf)

## Summary
### Approach
This paper investigates the deployment of RPKI for popular websites in 2015.


### Datasets
Data collected in 2014-2015:
- Alexa top 1 million websites
- Resolved domain names to IPs using Google DNS
- RIPE RIS data to map IP to prefixes and originating ASes
- RPKI data


## (Section 4.1) Less Popular Content is More Secured

### Original results

- 0.09% of the prefixes for Alexa top 1m are invalid
- 6% of the web server prefixes are covered by RPKI (valid and invalid)
- For the first 100k domains only ≈4.0% of prefixes are secured via RPKI. For the last 100k domains, ≈5.5% are secured.


### IYP Results

In [4]:
# Setup access to IYP
from neo4j import GraphDatabase, RoutingControl

# Using IYP local instance
# URI = "neo4j://localhost:7687"
# Using IYP public instance
URI = "neo4j://iyp-bolt.ihr.live:7687"
AUTH = ('neo4j', 'password')
db = GraphDatabase.driver(URI, auth=AUTH)

In [5]:
# Get the percentage of invalid prefixes
query_all = """
// Resolve host names and corresponding rankings from the Tranco top 1 million list and count corresponding prefixes
MATCH (:Ranking {name:'Tranco top 1M'})-[r:RANK]-(d:DomainName)<-[:PART_OF]-(h:HostName)
WHERE d.name = h.name 
WITH DISTINCT h
MATCH (h)-[:RESOLVES_TO]->(:IP)-[:PART_OF]->(pfx:Prefix)
RETURN COUNT(DISTINCT pfx.prefix)"""

query_invalid = """
// Resolve host names and corresponding rankings from the Tranco top 1 million list and count corresponding RPKI invalid prefixes
MATCH (:Ranking {name:'Tranco top 1M'})-[r:RANK]-(d:DomainName)<-[:PART_OF]-(h:HostName)
WHERE d.name = h.name 
WITH DISTINCT h
MATCH (h)-[:RESOLVES_TO]->(:IP)-[:PART_OF]->(pfx:Prefix)-[:CATEGORIZED]-(t:Tag)
WHERE t.label STARTS WITH 'RPKI Invalid'
RETURN COUNT(DISTINCT pfx.prefix)"""

res, _, _ = db.execute_query(query_all, database_="neo4j");
all_prefixes = res[0][0]
res, _, _ = db.execute_query(query_invalid, database_="neo4j");
rpki_prefixes = res[0][0]
print(f'{100*rpki_prefixes/all_prefixes:.2f}% of the prefixes for Tranco are RPKI invalid.')

0.08% of the prefixes for Tranco are RPKI invalid.


In [6]:
query_rpki = """
// Resolve host names and corresponding rankings from the Tranco top 1 million list and count corresponding RPKI invalid prefixes
MATCH (:Ranking {name:'Tranco top 1M'})-[r:RANK]-(d:DomainName)-[:PART_OF]-(h:HostName)-[:RESOLVES_TO]-(i:IP)-[:PART_OF]-(pfx:Prefix)-[:CATEGORIZED]-(t:Tag)
WHERE d.name = h.name AND ( t.label = 'RPKI Valid' OR t.label STARTS WITH 'RPKI Invalid' )
RETURN COUNT(DISTINCT pfx)"""

res, _, _ = db.execute_query(query_rpki, database_="neo4j");
rpki_prefixes = res[0][0]
print(f'{100*rpki_prefixes/all_prefixes:.1f}% of the prefixes for Tranco are covered by RPKI.')

55.0% of the prefixes for Tranco are covered by RPKI.


In [7]:
# Get the RPKI coverage for the top 100k
query_all_top10k = """
// Resolve host names and corresponding rankings from the Tranco top 1 million list and count corresponding prefixes
MATCH (:Ranking {name:'Tranco top 1M'})-[r:RANK]-(d:DomainName)-[:PART_OF]-(h:HostName)-[:RESOLVES_TO]-(i:IP)-[:PART_OF]-(pfx:Prefix)
WHERE r.rank < 100000 AND d.name = h.name 
RETURN COUNT(DISTINCT pfx)"""

query_rpki_top10k = """
// Resolve host names and corresponding rankings from the Tranco top 1 million list and count corresponding RPKI valid and invalid prefixes
MATCH (:Ranking {name:'Tranco top 1M'})-[r:RANK]-(d:DomainName)-[:PART_OF]-(h:HostName)-[:RESOLVES_TO]-(i:IP)-[:PART_OF]-(pfx:Prefix)-[:CATEGORIZED]-(t:Tag)
WHERE r.rank < 100000 AND d.name = h.name AND ( t.label = 'RPKI Valid' OR t.label STARTS WITH 'RPKI Invalid' )
RETURN COUNT(DISTINCT pfx)"""

res, _, _ = db.execute_query(query_all_top10k, database_="neo4j");
all_prefixes_top10k = res[0][0]
res, _, _ = db.execute_query(query_rpki_top10k, database_="neo4j");
rpki_prefixes_top10k = res[0][0]
print(f'{100*rpki_prefixes_top10k/all_prefixes_top10k:.1f}% of the prefixes for Tranco top 100k are covered by RPKI.')

58.9% of the prefixes for Tranco top 100k are covered by RPKI.


In [8]:
# Get the RPKI coverage for the bottom 100k
query_all_bt10k = """
// Resolve host names and corresponding rankings from the Tranco bottom 1 million list and count corresponding prefixes
MATCH (:Ranking {name:'Tranco top 1M'})-[r:RANK]-(d:DomainName)-[:PART_OF]-(h:HostName)-[:RESOLVES_TO]-(i:IP)-[:PART_OF]-(pfx:Prefix)
WHERE r.rank > 900000 AND d.name = h.name
RETURN COUNT(DISTINCT pfx)"""

query_rpki_bt10k = """
// Resolve host names and corresponding rankings from the Tranco bottom 1 million list and count corresponding RPKI valid and invalid prefixes
MATCH (:Ranking {name:'Tranco top 1M'})-[r:RANK]-(d:DomainName)-[:PART_OF]-(h:HostName)-[:RESOLVES_TO]-(i:IP)-[:PART_OF]-(pfx:Prefix)-[:CATEGORIZED]-(t:Tag)
WHERE r.rank > 900000 AND d.name = h.name AND (t.label = 'RPKI Valid' OR t.label STARTS WITH  'RPKI Invalid')
RETURN COUNT(DISTINCT pfx)"""

res, _, _ = db.execute_query(query_all_bt10k, database_="neo4j");
all_prefixes_bt10k = res[0][0]
res, _, _ = db.execute_query(query_rpki_bt10k, database_="neo4j");
rpki_prefixes_bt10k = res[0][0]
print(f'{100*rpki_prefixes_bt10k/all_prefixes_bt10k:.1f}% of the prefixes for Tranco bottom 100k are covered by RPKI.')

64.2% of the prefixes for Tranco bottom 100k are covered by RPKI.


## (Section 4.3) RPKI deployment for CDN is low

### Original results
0.9% domain names on CDN are covered by RPKI.



### IYP Results

In [9]:
query_all_cdn_prefixes = """
MATCH (:Ranking {name:'Tranco top 1M'})-[r:RANK]-(d:DomainName)-[:PART_OF]-(h:HostName)-[:RESOLVES_TO]-(i:IP)-[:PART_OF]-(pfx:Prefix)-[:ORIGINATE]-(:AS)-[:CATEGORIZED {reference_org:'BGP.Tools'}]-(t:Tag {label:'Content Delivery Network'})
RETURN COUNT(DISTINCT pfx.prefix) AS total_pfx"""

query_valid_cdn_prefixes = """
MATCH (:Ranking {name:'Tranco top 1M'})-[r:RANK]-(d:DomainName)-[:PART_OF]-(h:HostName)-[:RESOLVES_TO]-(i:IP)-[:PART_OF]-(pfx:Prefix)-[:ORIGINATE]-(:AS)-[:CATEGORIZED {reference_org:'BGP.Tools'}]-(t:Tag {label:'Content Delivery Network'})
WITH DISTINCT pfx
MATCH (pfx)-[:CATEGORIZED]-(t:Tag)
WHERE t.label = 'RPKI Valid' OR t.label STARTS WITH  'RPKI Invalid'
RETURN COUNT(DISTINCT pfx.prefix) AS nb_valid
"""
res, _, _ = db.execute_query(query_all_cdn_prefixes, database_="neo4j");
total_cdn_prefixes = res[0][0]
res, _, _ = db.execute_query(query_valid_cdn_prefixes, database_="neo4j");
valid_cdn_prefixes = res[0][0]
print(f'{100*valid_cdn_prefixes/total_cdn_prefixes:.1f}% of the CDN prefixes are covered by RPKI.')

73.1% of the CDN prefixes are covered by RPKI.


In [10]:
# percentage of RPKI valid prefixes for each BGP.Tools tag
query_all_tags = """
MATCH (t:Tag)-[:CATEGORIZED {reference_org:'BGP.Tools'}]-(:AS)
WITH COLLECT(DISTINCT t.label) AS tags
UNWIND tags AS tag
MATCH (t:Tag {label:tag})-[:CATEGORIZED {reference_org:'BGP.Tools'}]-(:AS)-[:ORIGINATE]-(p:Prefix)
WITH tag, t, COUNT(DISTINCT p.prefix) AS total_pfx
MATCH (t:Tag {label:tag})-[:CATEGORIZED {reference_org:'BGP.Tools'}]-(a:AS)-[:ORIGINATE]-(p:Prefix)-[:CATEGORIZED]-(tp:Tag)
WHERE tp.label = 'RPKI Valid' OR tp.label STARTS WITH  'RPKI Invalid'
WITH t, total_pfx, COUNT(DISTINCT p.prefix) AS nb_valid
RETURN t.label AS tag, 100*nb_valid/total_pfx AS perc_valid  ORDER by perc_valid"""

res, _, _ = db.execute_query(query_all_tags, database_="neo4j");
for tag, perc in res:
    print(f'{perc:.1f}\t{tag}')
    

20.0	Academic
22.0	Government
27.0	T1
33.0	Business Broadband
34.0	Carrier
36.0	Mobile Data/Carrier
43.0	Home ISP
44.0	Eyeball
47.0	Tranco 10k Host
47.0	Server Hosting
52.0	ToR Services
54.0	Internet Critical Infra
60.0	Anycast
62.0	VPN Host
65.0	Validating RPKI ROV
71.0	Content Delivery Network
72.0	Content
74.0	DDoS Mitigation
77.0	Corporate/Enterprise
84.0	Satellite Internet
84.0	Personal ASN
85.0	Event


# Groupped by domain names

In [11]:
query_rpki = """
// Resolve host names and corresponding rankings from the Tranco top 1 million list and count corresponding RPKI invalid prefixes
MATCH (:Ranking {name:'Tranco top 1M'})-[r:RANK]-(d:DomainName)-[:PART_OF]-(h:HostName)-[:RESOLVES_TO]-(i:IP)-[:PART_OF]-(pfx:Prefix)
WHERE d.name = h.name
RETURN COUNT(DISTINCT d)"""
res, _, _ = db.execute_query(query_rpki, database_="neo4j");
all_domain = res[0][0]


query_rpki = """
// Resolve host names and corresponding rankings from the Tranco top 1 million list and count corresponding RPKI invalid prefixes
MATCH (:Ranking {name:'Tranco top 1M'})-[r:RANK]-(d:DomainName)-[:PART_OF]-(h:HostName)-[:RESOLVES_TO]-(i:IP)-[:PART_OF]-(pfx:Prefix)-[:CATEGORIZED]-(t:Tag)
WHERE d.name = h.name AND ( t.label = 'RPKI Valid' OR t.label STARTS WITH 'RPKI Invalid' )
RETURN COUNT(DISTINCT d)"""

res, _, _ = db.execute_query(query_rpki, database_="neo4j");
rpki_domain = res[0][0]
print(f'{100*rpki_domain/all_domain:.1f}% of the domains in Tranco are covered by RPKI.')
print(f'{all_domain}')


80.7% of the domains in Tranco are covered by RPKI.
896927
