-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add scripts to download and process BGP dumps
- Loading branch information
1 parent
6b87ab1
commit 9210017
Showing
6 changed files
with
142 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
This set of scripts allows to download, parse and aggregate BGP announcement dumps from open repositories to be used in asmap construction. | ||
|
||
### Pre-reqs | ||
|
||
``./setup.sh`` | ||
|
||
### Use | ||
|
||
0. ``./prepare.sh`` deletes old data. | ||
1. ``./download_dumps.py`` downloads RIPE dumps for a selected date (configured in the file) to the `dumps` folder. | ||
2. ``./quagga_parse.sh`` reads dumps from the `dumps` folder and | ||
writes the human readable interpretation to the `paths` folder. | ||
3. ``./quagga_aggregate.py`` goes through the interpreted dumps in ``paths`` folder, aggregates paths and assigns every IP prefix to the first element of the common suffix of the asn path. | ||
|
||
Resulting ``prefix_asns.out`` can be fed to ``../buildmap.py``. | ||
|
||
### Rationale | ||
|
||
Consider the following scenario: | ||
1.2.3.4: A -> B -> C -> X | ||
1.2.3.4: A -> F -> C -> X | ||
|
||
In this case, {C, X} is the common suffix, and we will map 1.2.3.4 to C, because C represents the single infrastructure required to reach that IP address. | ||
|
||
Note that diversifying by C would implicitly diversify by X too. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#!/usr/bin/env python | ||
|
||
import urllib.request | ||
import datetime | ||
|
||
providers = range(1, 24) | ||
date = datetime.date.today() | ||
|
||
dumps_dir = "dumps/" | ||
|
||
for provider in providers: | ||
provider = ("{:02d}".format(provider)) | ||
link = "http://data.ris.ripe.net/rrc{0}/latest-bview.gz".format(provider) | ||
dump_name = "dump_{0}_{1}.gz".format(provider, date) | ||
print(link) | ||
try: | ||
dump = urllib.request.urlopen(link) | ||
except Exception: | ||
print('Failed to download: ' + link) | ||
continue | ||
with open(dumps_dir + dump_name,'wb+') as output: | ||
output.write(dump.read()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/bin/bash | ||
|
||
rm dumps/* | ||
rm paths/* | ||
rm prefix_asns.out |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#!/usr/bin/env python | ||
|
||
import sys | ||
import os | ||
import re | ||
|
||
PARSED_DUMPS_DIR = 'paths/' | ||
RESULT_OUTPUT = 'prefix_asns.out' | ||
first_octet = re.compile(r"^[^.|:]*") | ||
|
||
|
||
# Remove duplicate asns in a row | ||
# [1, 1, 2, 3, 3, 3] -> [1, 2, 3] | ||
def dedup(asn_path): | ||
i = len(asn_path) - 2 | ||
while i > 0: | ||
if asn_path[i] == asn_path[i - 1]: | ||
asn_path = asn_path[0:i] + asn_path[i+1:] | ||
i -= 1 | ||
return asn_path | ||
|
||
def find_common_suffixes(prefix_asn_paths, common_asn_suffix): | ||
for prefix, asn_lists in prefix_asn_paths.items(): | ||
asn_lists = [dedup(asn_list.split(' ')) for asn_list in asn_lists] # preprocess | ||
asn_lists.sort(key = len) | ||
cur_asn_suffix = asn_lists[0] # represents the common sub-path (from the end) of asns to a prefix | ||
for asn_list in asn_lists[1:]: | ||
if cur_asn_suffix == asn_list: | ||
continue | ||
if cur_asn_suffix[-1] != asn_list[-1]: # multi-homed | ||
break | ||
cur_asn_suffix_len = len(cur_asn_suffix) | ||
for i in range(1, cur_asn_suffix_len): # position from the end | ||
if cur_asn_suffix[len(cur_asn_suffix) - i - 1] != asn_list[len(asn_list) - i - 1]: | ||
cur_asn_suffix = cur_asn_suffix[len(cur_asn_suffix) - i:] | ||
break | ||
common_asn_suffix[prefix] = cur_asn_suffix | ||
|
||
def process_files(): | ||
res = dict() | ||
files = os.listdir(PARSED_DUMPS_DIR) | ||
step = 40 | ||
for i in range(1, 256, step): # process ip range chunks so that memory is not filled | ||
print("Working on chunk: ", i, flush=True) | ||
announcements = dict() | ||
for file_name in files: | ||
print('Reading file: ', file_name, flush=True) | ||
with open(PARSED_DUMPS_DIR + file_name, "r") as file: | ||
for line in file: | ||
announcement_data = re.sub(r'{*}', ' ', line.strip()) # removes {} sets in AS path | ||
announcement_data = announcement_data.split('|') | ||
prefix = announcement_data[0] | ||
first_oc = re.search(first_octet, prefix).group(0) | ||
if first_oc == '' or int(first_oc) > i + step: # passed current chunk | ||
break | ||
if int(first_oc) < i: # current chunk is ahead | ||
continue | ||
asns = announcement_data[1] | ||
announcements.setdefault(prefix, set()).add(asns) | ||
find_common_suffixes(announcements, res) | ||
return res | ||
|
||
def dump_result(prefix_unique_asn_suffixes): | ||
with open(RESULT_OUTPUT, 'w+') as file: | ||
for prefix, unique_asn_suffix in prefix_unique_asn_suffixes.items(): | ||
if unique_asn_suffix != [] and unique_asn_suffix[0] != '': # This happens very rarely. TODO debug | ||
file.write("%s AS%s\n" % (prefix, unique_asn_suffix[0])) | ||
|
||
res = process_files() | ||
dump_result(res) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#!/bin/bash | ||
|
||
for mrt in `ls dumps`; do | ||
/bin/echo -n "processing $mrt... " | ||
OUT=$mrt | ||
/usr/local/bin/bgpdump -vm dumps/$mrt | cut -d '|' -f '6,7' > paths/$OUT | ||
done | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#!/bin/bash | ||
|
||
mkdir dumps | ||
mkdir paths | ||
|
||
wget http://ris.ripe.net/source/bgpdump/libbgpdump-1.6.0.tgz | ||
tar zxvf libbgpdump-1.6.0.tgz | ||
rm libbgpdump-1.6.0.tgz | ||
cd libbgpdump-1.6.0 | ||
./bootstrap.sh | ||
make install | ||
cd .. |