### Track the extent and evolution of the use of BGP communities.  
### To this end, please use a set of routing tables from BGPStream that covers the period 2011-2021.  
### More specifically, use all the routing tables that were available in the first seven days of January of every second year, that is 2011, 2013, 2015, 2017, 2019 and 2021.  
### Then, identify how many ASes are using communities, rank ASes based on the number of prefixes that are tagged with communities, check what tier-1 ASes use communities for by matching the used communities to what they list publicly. 

https://labs.ripe.net/author/florian_streibelt/bgp-communities-a-weapon-for-the-internet-part-1/

In [None]:
import pybgpstream

dir_name = 'data/'
from_time = "2017-01-01 00:00:00"
until_time = "2017-01-01 00:02:00 UTC"
collectors = ["route-views.sg", "route-views.eqix"]
record_type = "ribs"
# filter = "peer 11666 and prefix more 210.180.0.0/16"
# file_name = 

stream = pybgpstream.BGPStream(
    from_time=from_time, until_time=until_time,
    collectors=collectors,
    record_type=record_type
    # filter=filter
)

# f = open("demofile3.txt", "w")

for elem in stream:
    # record fields can be accessed directly from elem
    # e.g. elem.time
    # or via elem.record
    # e.g. elem.record.time
    print(str(stream.__sizeof__()) + '  ' + elem.__str__())
    # f.write(elem.__str__() + '\n')
    # f.flush()

# f.close()

In [None]:
import pybgpstream

# create and configure the stream
stream = pybgpstream.BGPStream(
   from_time="2017-07-07 00:00:00", until_time="2017-07-07 00:10:00 UTC",
   collectors=["route-views.sg", "route-views.eqix"],
   record_type="updates",
   filter="peer 11666 and prefix more 210.180.0.0/16"
)

# add any additional (or dynamic) filters
# e.g. from peer AS 11666 regarding the more-specifics of 210.180.0.0/16:
# stream.parse_filter_string("peer 11666 and prefix more 210.180.0.0/16")
# or using the old filter interface:
# stream.add_filter("peer-asn", "11666")
# stream.add_filter("prefix-more", "210.180.0.0/16")

# read elems
# for elem in stream:
   # record fields can be accessed directly from elem
   # e.g. elem.time
   # or via elem.record
   # e.g. elem.record.time
#    print(elem)

# alternatively, records and elems can be read in nested loops:
for rec in stream.records():
   # do something with rec (e.g., choose to continue based on timestamp)
#    print("Received %s record at time %d from collector %s" % (rec.type, rec.time, rec.collector))
#    print(rec)
   for elem in rec:
      # do something with rec and/or elem
      print("  Elem Type: %s" % elem.type)



In [21]:
import requests
import subprocess
import gzip
import os

In [4]:
collectors = ["rrc00", "rrc01", "rrc03", "rrc04", "rrc05"]
year_list = ['2011', '2013', '2015', '2017', '2019', '2021']
month = '01'
day_list = [ '01', '02', '03', '04', '05', '06', '07']
hour_list = ['0759', '1559', '2359', '0000', '0800', '1600']
record_type = "bview"
dir_name = 'ribs_data/'
dir_name_unzip = 'ribs_data_unzip/'
dir_name_bgpdump = 'ribs_data_bgpdump/'
base_url = 'http://data.ris.ripe.net/'

def decompress_file(base_file_name, compressed_content):
    with open(dir_name_unzip + base_file_name, 'wb') as f_out:
            f_out.write(gzip.decompress(compressed_content))
    

def bgpdump_file(file_name):
    dump_file_name = file_name + '_bgpdump'
    with open(dir_name_bgpdump + dump_file_name, 'wb') as file:
        file.write(subprocess.run(['bgpdump', "-m", dir_name_unzip + file_name ], stdout=subprocess.PIPE).stdout)
    

for collector in collectors:
    for year in year_list:
        for day in day_list:
            for hour in hour_list:
                base_file_name = record_type + '.' + year + month + day + '.' + hour
                gzip_file_name = base_file_name + '.gz'

                
                url = base_url + collector + '/' + year + '.' + month + '/' + gzip_file_name
                response = requests.get(url)
                print(str(response.status_code) + ' ' + collector + ' ' + base_file_name)
                if response.status_code == 200:
                    content = response.content
                    
                    with open(dir_name + collector + '_' + gzip_file_name, "wb") as f:
                        f.write(content)
                    save_file_name = collector + '_' + base_file_name
                    decompress_file(save_file_name, content)
                    bgpdump_file(save_file_name)



404 rrc00 bview.20110101.0759
404 rrc00 bview.20110101.1559
404 rrc00 bview.20110101.2359
200 rrc00 bview.20110101.0000
200 rrc00 bview.20110101.0800
200 rrc00 bview.20110101.1600


In [22]:
collectors = ["rrc00"]#, "rrc01", "rrc03", "rrc04", "rrc05"]
year_list = ['2011']#, '2013', '2015', '2017', '2019', '2021']
month = '01'
day_list = [ '01']#, '02', '03', '04', '05', '06', '07']
hour_list = ['0759', '1559', '2359', '0000']#, '0800', '1600']
record_type = "bview"
dir_name_unzip = 'deneme_unzip/'
dir_name_bgpdump = 'deneme/'
base_url = 'http://data.ris.ripe.net/'  

def decompress_file(base_file_name, compressed_content):
    with open(dir_name_unzip + base_file_name, 'wb') as f_out:
            f_out.write(gzip.decompress(compressed_content))



def bgpdump_file(file_name):
    dump_file_name = file_name + '_bgpdump'
    with open(dir_name_bgpdump + dump_file_name, 'wb') as file:
        file.write(subprocess.run(['bgpdump', "-m", dir_name_unzip + file_name ], stdout=subprocess.PIPE).stdout)
    os.remove(dir_name_unzip + file_name)


for collector in collectors:
    for year in year_list:
        for day in day_list:
            for hour in hour_list:
                base_file_name = record_type + '.' + year + month + day + '.' + hour
                gzip_file_name = base_file_name + '.gz'

                
                url = base_url + collector + '/' + year + '.' + month + '/' + gzip_file_name
                response = requests.get(url)
                print(str(response.status_code) + ' ' + collector + ' ' + base_file_name)
                if response.status_code == 200:                    
                    content = response.content
                    save_file_name = collector + '_' + base_file_name
                    decompress_file(save_file_name, content)
                    bgpdump_file(save_file_name)
print('Done!!')


404 rrc00 bview.20110101.0759
404 rrc00 bview.20110101.1559
404 rrc00 bview.20110101.2359
200 rrc00 bview.20110101.0000
Done!!


# ANALYSIS STARTS

In [3]:
import pyspark