Skip to content

Commit

Permalink
Add scripts to download and process BGP dumps
Browse files Browse the repository at this point in the history
  • Loading branch information
naumenkogs committed Dec 20, 2019
1 parent 6b87ab1 commit 9210017
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 0 deletions.
25 changes: 25 additions & 0 deletions remote_dumps/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
This set of scripts allows to download, parse and aggregate BGP announcement dumps from open repositories to be used in asmap construction.

### Pre-reqs

``./setup.sh``

### Use

0. ``./prepare.sh`` deletes old data.
1. ``./download_dumps.py`` downloads RIPE dumps for a selected date (configured in the file) to the `dumps` folder.
2. ``./quagga_parse.sh`` reads dumps from the `dumps` folder and
writes the human readable interpretation to the `paths` folder.
3. ``./quagga_aggregate.py`` goes through the interpreted dumps in ``paths`` folder, aggregates paths and assigns every IP prefix to the first element of the common suffix of the asn path.

Resulting ``prefix_asns.out`` can be fed to ``../buildmap.py``.

### Rationale

Consider the following scenario:
1.2.3.4: A -> B -> C -> X
1.2.3.4: A -> F -> C -> X

In this case, {C, X} is the common suffix, and we will map 1.2.3.4 to C, because C represents the single infrastructure required to reach that IP address.

Note that diversifying by C would implicitly diversify by X too.
22 changes: 22 additions & 0 deletions remote_dumps/download_dumps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env python

import urllib.request
import datetime

providers = range(1, 24)
date = datetime.date.today()

dumps_dir = "dumps/"

for provider in providers:
provider = ("{:02d}".format(provider))
link = "http://data.ris.ripe.net/rrc{0}/latest-bview.gz".format(provider)
dump_name = "dump_{0}_{1}.gz".format(provider, date)
print(link)
try:
dump = urllib.request.urlopen(link)
except Exception:
print('Failed to download: ' + link)
continue
with open(dumps_dir + dump_name,'wb+') as output:
output.write(dump.read())
5 changes: 5 additions & 0 deletions remote_dumps/prepare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

rm dumps/*
rm paths/*
rm prefix_asns.out
70 changes: 70 additions & 0 deletions remote_dumps/quagga_aggregate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/bin/env python

import sys
import os
import re

PARSED_DUMPS_DIR = 'paths/'
RESULT_OUTPUT = 'prefix_asns.out'
first_octet = re.compile(r"^[^.|:]*")


# Remove duplicate asns in a row
# [1, 1, 2, 3, 3, 3] -> [1, 2, 3]
def dedup(asn_path):
i = len(asn_path) - 2
while i > 0:
if asn_path[i] == asn_path[i - 1]:
asn_path = asn_path[0:i] + asn_path[i+1:]
i -= 1
return asn_path

def find_common_suffixes(prefix_asn_paths, common_asn_suffix):
for prefix, asn_lists in prefix_asn_paths.items():
asn_lists = [dedup(asn_list.split(' ')) for asn_list in asn_lists] # preprocess
asn_lists.sort(key = len)
cur_asn_suffix = asn_lists[0] # represents the common sub-path (from the end) of asns to a prefix
for asn_list in asn_lists[1:]:
if cur_asn_suffix == asn_list:
continue
if cur_asn_suffix[-1] != asn_list[-1]: # multi-homed
break
cur_asn_suffix_len = len(cur_asn_suffix)
for i in range(1, cur_asn_suffix_len): # position from the end
if cur_asn_suffix[len(cur_asn_suffix) - i - 1] != asn_list[len(asn_list) - i - 1]:
cur_asn_suffix = cur_asn_suffix[len(cur_asn_suffix) - i:]
break
common_asn_suffix[prefix] = cur_asn_suffix

def process_files():
res = dict()
files = os.listdir(PARSED_DUMPS_DIR)
step = 40
for i in range(1, 256, step): # process ip range chunks so that memory is not filled
print("Working on chunk: ", i, flush=True)
announcements = dict()
for file_name in files:
print('Reading file: ', file_name, flush=True)
with open(PARSED_DUMPS_DIR + file_name, "r") as file:
for line in file:
announcement_data = re.sub(r'{*}', ' ', line.strip()) # removes {} sets in AS path
announcement_data = announcement_data.split('|')
prefix = announcement_data[0]
first_oc = re.search(first_octet, prefix).group(0)
if first_oc == '' or int(first_oc) > i + step: # passed current chunk
break
if int(first_oc) < i: # current chunk is ahead
continue
asns = announcement_data[1]
announcements.setdefault(prefix, set()).add(asns)
find_common_suffixes(announcements, res)
return res

def dump_result(prefix_unique_asn_suffixes):
with open(RESULT_OUTPUT, 'w+') as file:
for prefix, unique_asn_suffix in prefix_unique_asn_suffixes.items():
if unique_asn_suffix != [] and unique_asn_suffix[0] != '': # This happens very rarely. TODO debug
file.write("%s AS%s\n" % (prefix, unique_asn_suffix[0]))

res = process_files()
dump_result(res)
8 changes: 8 additions & 0 deletions remote_dumps/quagga_parse.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

for mrt in `ls dumps`; do
/bin/echo -n "processing $mrt... "
OUT=$mrt
/usr/local/bin/bgpdump -vm dumps/$mrt | cut -d '|' -f '6,7' > paths/$OUT
done

12 changes: 12 additions & 0 deletions remote_dumps/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

mkdir dumps
mkdir paths

wget http://ris.ripe.net/source/bgpdump/libbgpdump-1.6.0.tgz
tar zxvf libbgpdump-1.6.0.tgz
rm libbgpdump-1.6.0.tgz
cd libbgpdump-1.6.0
./bootstrap.sh
make install
cd ..

0 comments on commit 9210017

Please sign in to comment.