In [6]:
import argparse
import json
import gzip
import os
import urllib.request
import networkx as nx
from bz2 import BZ2File as bzopen
from urllib.request import urlopen
from networkx.readwrite import json_graph
from bs4 import BeautifulSoup


def create_graph(year, month):
    # Create graph from AS relationships data
    G = nx.DiGraph()
#     G = DiGraph()
    data_dir = 'as_relationships_data'
    fname = os.path.join(data_dir, f'{year}{month}01.as-rel.txt.bz2')
    with bzopen(fname, 'r') as f:
        for i, line in enumerate(f):
            line = line.decode('utf-8').strip()
            if line.startswith('#'):
                continue
            else:
                l = line.split('|')
                as1 = int(l[0])
                as2 = int(l[1])
                relationship_type = int(l[2])
                if relationship_type == -1:
                    r = 'c2p'
                elif relationship_type == 0:
                    r = 'p2p'
                else:
                    raise Exception('Invalid relationship type!')
                G.add_edge(as1, as2, label=r)
    return G


def download_as_links_files(year, month, monitor=None):
    url = f'http://data.caida.org/datasets/topology/ark/ipv6/as-links/{year}/{month}'
    html = urlopen(url)
    soup = BeautifulSoup(html, 'html.parser')
    links = []
    for link in soup.find_all('a'):
        l = link.get('href')
        if l.startswith('cycle-aslinks.l8') and year + month in l:
            if monitor is not None and monitor in l:
                links.append(l)
    data_dir = 'as_links_data'
    if not os.path.exists(data_dir):
        os.mkdir(data_dir)
    out_dir = os.path.join(data_dir, year + month)
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    for l in links:
        full_link = os.path.join(url, l)
        out_file = os.path.join(out_dir, l)
        if os.path.exists(out_file):
            continue
        else:
            print(f'Downloading {full_link}')
            urllib.request.urlretrieve(full_link, out_file)


def download_as_relationships_file(year, month):
    l = f'{year}{month}01.as-rel.txt.bz2'
    full_link = f'http://data.caida.org/datasets/as-relationships/serial-1/{l}'
    data_dir = 'as_relationships_data'
    if not os.path.exists(data_dir):
        os.mkdir(data_dir)
    out_file = os.path.join(data_dir, l)
    if not os.path.exists(out_file):
        print(f'Downloading {full_link}')
        urllib.request.urlretrieve(full_link, out_file)


In [2]:
year = '2018'
month = '12'
monitor = 'sea-us'

In [7]:
download_as_links_files(year, month, monitor)
download_as_relationships_file(year, month)

In [11]:
# Create graph from AS relationships
G = create_graph(year, month)
print(f'G # nodes: {len(G.nodes)}')
print(f'G # edges: {len(G.edges)}')

folder = os.path.join('as_links_data', year + month)
fnames = sorted(os.listdir(folder))

ipv6_nodes = set() # Set of all nodes that have adopted IPv6

for idx, fname in enumerate(fnames):
    if not fname.endswith('.gz'):
        continue
    if monitor is not None and monitor not in fname:
        continue
    print(f'Analyzing {fname}')
    with gzip.open(os.path.join(folder, fname), 'r') as f:
        for i, line in enumerate(f):
            line = line.decode('utf-8').strip()
            if line.startswith('D') or line.startswith('I'):
                line = line.split('\t')
                type_ = line[0]
                as1_list = line[1].split(',')
                as2_list = line[2].split(',')
                for as1 in as1_list:
                    as1 = int(as1)
                    if as1 not in G.nodes: # Skip if AS does not appear in AS relationships graph
                        continue
                    if as1 not in ipv6_nodes: # Newly adopted IPv6
                        timestamp = int(fname.split('.')[3])
                        G.nodes[as1]['ipv6'] = True
                        if idx == 0:
                            # Node is initially IPv6
                            G.nodes[as1]['time_adopted'] = 0
                            continue
                        else:
                            # Node adopted IPv6 at this timestamp
                            G.nodes[as1]['time_adopted'] = timestamp
                        
                        # Get num predecessors that have adopted IPv6
                        ipv6_predecessors = 0
                        for n in G.predecessors(as1):
                            if G.nodes[n].get('ipv6'):
                                ipv6_predecessors += 1
                        
                        G.nodes[as1]['ipv6_predecessors'] = ipv6_predecessors
                        num_predecessors = len(list(G.predecessors(as1)))
                        G.nodes[as1]['num_predecessors'] = num_predecessors

                        if idx != 0 and num_predecessors != 0:
#                             print(as1, G.nodes[as1])
                            print(float(ipv6_predecessors) / num_predecessors)

                        ipv6_nodes.add(as1)

#                     for as2 in as2_list:
#                         as2 = int(as2)


G # nodes: 63361
G # edges: 320978
Analyzing cycle-aslinks.l8.20181201.1543650802.sea-us.txt.gz
Analyzing cycle-aslinks.l8.20181201.1543685293.sea-us.txt.gz
1.0
0.48363636363636364
1.0
1.0
1.0
0.8636363636363636
1.0
0.7777777777777778
1.0
0.75
0.7058823529411765
0.0
1.0
1.0
0.6
0.8888888888888888
1.0
1.0
0.6363636363636364
1.0
1.0
0.6666666666666666
1.0
1.0
0.8181818181818182
0.8
0.75
1.0
0.9285714285714286
0.75
1.0
1.0
1.0
1.0
0.7692307692307693
0.5061728395061729
0.5
1.0
1.0
0.5333333333333333
1.0
0.7272727272727273
0.43333333333333335
0.6
1.0
0.8333333333333334
0.7272727272727273
1.0
1.0
1.0
0.5555555555555556
1.0
0.75
0.8
0.75
0.8571428571428571
0.8
1.0
1.0
1.0
1.0
0.5
1.0
0.7058823529411765
0.5714285714285714
0.7307692307692307
0.782608695652174
0.5
0.7058823529411765
0.5
0.6
0.5735294117647058
0.6
0.5
0.8333333333333334
0.7333333333333333
1.0
0.5122950819672131
0.75
0.625
0.725
0.9
0.8333333333333334
0.5
0.5276073619631901
1.0
0.35714285714285715
0.8
0.6666666666666666
0.7
0.6785

0.4387990762124711
0.401854714064915
0.3333333333333333
0.7777777777777778
1.0
0.8636363636363636
0.78125
0.6
0.42857142857142855
0.6575342465753424
0.5708154506437768
0.6888888888888889
0.7142857142857143
0.5882352941176471
0.7
0.40046838407494145
0.673469387755102
0.8571428571428571
0.7948717948717948
0.7441860465116279
0.4166666666666667
0.5454545454545454
0.5853658536585366
0.8125
0.3333333333333333
1.0
0.65
0.9166666666666666
0.5652173913043478
0.7391304347826086
1.0
1.0
0.5714285714285714
0.7
0.8666666666666667
0.7142857142857143
0.3673469387755102
0.4857142857142857
1.0
0.6666666666666666
0.4666666666666667
0.5588235294117647
0.6966292134831461
1.0
1.0
0.6739130434782609
0.6815642458100558
0.6666666666666666
1.0
0.6129032258064516
1.0
0.0
0.76
0.85
0.6923076923076923
0.7058823529411765
1.0
0.5
0.625
0.6666666666666666
1.0
0.6693548387096774
0.7142857142857143
0.8378378378378378
1.0
0.9
1.0
0.8
1.0
0.5555555555555556
1.0
0.68
1.0
0.41289198606271776
0.6136363636363636
0.321091290

0.8529411764705882
0.5671641791044776
1.0
0.75
1.0
1.0
1.0
0.8055555555555556
0.8
1.0
0.7758620689655172
0.8
1.0
0.8367346938775511
1.0
0.7857142857142857
1.0
1.0
0.875
1.0
0.75
0.5
0.8888888888888888
1.0
0.5
0.88
0.9230769230769231
0.9166666666666666
0.5
0.6666666666666666
Analyzing cycle-aslinks.l8.20181202.1543719660.sea-us.txt.gz
1.0
0.4444444444444444
1.0
0.7241379310344828
1.0
1.0
0.8
1.0
0.8
0.75
1.0
0.5
1.0
Analyzing cycle-aslinks.l8.20181202.1543754059.sea-us.txt.gz
1.0
0.8095238095238095
0.8333333333333334
0.4827586206896552
0.62
0.8103448275862069
0.0
0.4444444444444444
0.6666666666666666
0.9
Analyzing cycle-aslinks.l8.20181202.1543788365.sea-us.txt.gz
0.84375
0.7
1.0
0.5909090909090909
0.0
0.9166666666666666
0.8888888888888888
0.43902439024390244
0.46153846153846156
1.0
Analyzing cycle-aslinks.l8.20181203.1543822751.sea-us.txt.gz
0.8
0.9
1.0
0.6666666666666666
0.6
0.8571428571428571
0.5
1.0
0.8
0.7692307692307693
Analyzing cycle-aslinks.l8.20181203.1543857327.sea-us.txt.gz
