In [146]:
import pandas as pd
import numpy as np
import json
from random import randint
from collections import defaultdict

In [147]:
DOMESTIC = "2018-07-Domestic-Exchange-Index"
INTER = "2018-07-International-Exchange-Index"

In [148]:
domestic_data = pd.read_csv(DOMESTIC+".csv", dtype='str')
inter_data = pd.read_csv(INTER+".csv", dtype='str')
inter_data

Unnamed: 0,ASN-source,ASN,Name,Type,Bandwidth,Gb/s,Connectivity Type
0,AS38566,AS2914,NTT (SG),ISP-Inter,350,Mbps.,Transit/IPv6 Dual Stack
1,AS4651,AS45758,Triple T Internet/ Triple T Broadband,ISP-Local,500,Mbps,IPv4
2,AS4651,AS38450,GIN,ISP-Govt,1,Gbps,IPv4
3,AS4651,AS18013,B Network Solution (BNS),ISP-Local,10,Mbps,IPv4
4,AS4651,AS9931,CAT-ISP,ISP-Local,200,Gbps,IPv4
5,AS4651,AS131090,CAT ON Net,ISP-Local,210,Gbps,IPv4
6,AS4651,AS4618,INET,ISP-Local,1.8,Gbps,IPv4
7,AS4651,AS24187,KIRZ,ISP-Local,720,Mbps,IPv4
8,AS4651,AS56067,Metrabyte,ISP-Local,101,Mbps,IPv4
9,AS4651,AS38888,MILCOM,ISP-Local,600,Mbps,IPv4


In [149]:
domestic_names = list(set(domestic_data["Name"].values))
inter_names = list(set(inter_data["Name"].values))

DOMESTIC_NODES = defaultdict(set)
for item in domestic_data[["ASN", "Name"]].values:
    DOMESTIC_NODES[item[0]].add(item[1])
    
INTER_NODES = defaultdict(set)
for item in inter_data[["ASN", "Name"]].values:
    INTER_NODES[item[0]].add(item[1])
for item in inter_data["ASN-source"].values:
    if len(INTER_NODES[item]) == 0:
        INTER_NODES[item].add(item)
        inter_names.append(item)

In [150]:
DOMESTIC_EDGES = [(list(DOMESTIC_NODES[item[0]])[0], item[1]) for item in domestic_data[["ASN-source", "Name"]].values]

INTER_EDGES = [(list(INTER_NODES[item[0]])[0], item[1]) for item in inter_data[["ASN-source", "Name"]].values]

INTER_EDGES

[('NTT (TH)', 'NTT (SG)'),
 ('AS4651', 'Triple T Internet/ Triple T Broadband'),
 ('AS4651', 'GIN'),
 ('AS4651', 'B Network Solution (BNS)'),
 ('AS4651', 'CAT-ISP'),
 ('AS4651', 'CAT ON Net'),
 ('AS4651', 'INET'),
 ('AS4651', 'KIRZ'),
 ('AS4651', 'Metrabyte'),
 ('AS4651', 'MILCOM'),
 ('AS4651', 'NIPA.CLOUD'),
 ('AS4651', 'NTT (TH)'),
 ('AS4651', 'Pacific Internet'),
 ('AS4651', 'PROEN Internet'),
 ('AS4651', 'ServeNET'),
 ('AS4651', 'SIAMDATA'),
 ('AS4651', 'TOT ISP'),
 ('AS4651', 'TOT LLI'),
 ('AS4651', 'True Internet Corporation'),
 ('AS4651', 'UIH/ BEENET'),
 ('AS4651', 'ICT MOF'),
 ('AS4651', 'UniNet'),
 ('AS4651', 'AMS-IX (AN)'),
 ('AS4651', 'Any2Coresite (US)'),
 ('AS4651', 'BBIX (HK)'),
 ('AS4651', 'BBIX (SG)'),
 ('AS4651', 'CHT-I (TW)'),
 ('AS4651', 'CHT-I HiNet (TW)'),
 ('AS4651', 'DE-CIX (DE)'),
 ('AS4651', 'EdgeCast (SG)'),
 ('AS4651', 'Equinix (FR)'),
 ('AS4651', 'Equinix (SG)'),
 ('AS4651', 'Equinix San Jose (US)'),
 ('AS4651', 'Etisalat (UAE)'),
 ('AS4651', 'Facebook (SG)

In [151]:
def new_node(color, label, attributes, x, y, _id):
    node = {}
    node["color"] = color
    node["label"] = label
    node["attributes"] = attributes
    node["y"] = y
    node["x"] = x
    node["id"] = _id
    node["size"] = len([1 for pair in EDGES if label in pair]) * 5
    return node

In [152]:
def new_edge(sourceID, attributes, targetID, size):
    edge = {}
    edge["sourceID"] = sourceID
    edge["attributes"] = attributes
    edge["targetID"] = targetID
    edge["size"] = size
    return edge

In [153]:
colors = [
"#C91F37",
"#DC3023",
"#9D2933",
"#CF000F",
"#E68364",
"#F22613",
"#CF3A24",
"#C3272B",
"#8F1D21",
"#D24D57",
"#F08F907",
"#F47983",
"#DB5A6B",
"#C93756",
"#FCC9B9",
"#FFB3A7",
"#F62459",
"#F58F84",
"#875F9A",
"#5D3F6A",
"#89729E",
"#763568",
"#8D608C",
"#A87CA0",
"#5B3256",
"#BF55EC",
"#8E44AD",
"#9B59B6",
"#BE90D4",
"#4D8FAC",
"#5D8CAE",
"#22A7F0",
"#19B5FE",
"#59ABE3",
"#48929B",
"#317589",
"#89C4F4",
"#4B77BE",
"#1F4788",
"#003171",
"#044F67",
"#264348",
"#7A942E",
"#8DB255",
"#5B8930",
"#6B9362",
"#407A52",
"#006442",
"#87D37C",
"#26A65B",
"#26C281",
"#049372",
"#2ABB9B",
"#16A085",
"#36D7B7",
"#03A678",
"#4DAF7C",
"#D9B611",
"#F3C13A",
"#F7CA18",
"#E2B13C",
"#A17917",
"#F5D76E",
"#F4D03F",
"#FFA400",
"#E08A1E",
"#FFB61E",
"#FAA945",
"#FFA631",
"#FFB94E",
"#E29C45",
"#F9690E",
"#CA6924",
"#F5AB35",
"#BFBFBF",
"#F2F1EF",
"#BDC3C7",
"#ECF0F1",
"#D2D7D3",
"#757D75",
"#EEEEEE",
"#ABB7B7",
"#6C7A89",
"#95A5A6"]
colors_count = len(colors)

In [154]:
def gen_nodes(names):
    nodes = []
    for node in names:
        nodes.append(
            new_node(
                color=colors[randint(0, colors_count-1)], 
                label=node, 
                attributes={}, 
                x=randint(-5, 5), 
                y=randint(-5, 5), 
                _id=node
            )
        )
    return nodes
domestic_nodes = gen_nodes(domestic_names)
inter_nodes = gen_nodes(inter_names)
inter_nodes

[{'attributes': {},
  'color': '#F4D03F',
  'id': 'CS Loxinfo',
  'label': 'CS Loxinfo',
  'size': 5,
  'x': 0,
  'y': -3},
 {'attributes': {},
  'color': '#003171',
  'id': 'MANDA (UK)',
  'label': 'MANDA (UK)',
  'size': 0,
  'x': -2,
  'y': 2},
 {'attributes': {},
  'color': '#E2B13C',
  'id': 'Beeline (LA)',
  'label': 'Beeline (LA)',
  'size': 0,
  'x': 5,
  'y': -2},
 {'attributes': {},
  'color': '#26A65B',
  'id': 'AT&T (DE)',
  'label': 'AT&T (DE)',
  'size': 0,
  'x': 4,
  'y': 4},
 {'attributes': {},
  'color': '#F47983',
  'id': 'SOFTBANK (SG)',
  'label': 'SOFTBANK (SG)',
  'size': 0,
  'x': 4,
  'y': 4},
 {'attributes': {},
  'color': '#049372',
  'id': 'Limelight (US)',
  'label': 'Limelight (US)',
  'size': 0,
  'x': 5,
  'y': 1},
 {'attributes': {},
  'color': '#A87CA0',
  'id': 'Triple T Internet',
  'label': 'Triple T Internet',
  'size': 25,
  'x': -1,
  'y': -3},
 {'attributes': {},
  'color': '#BFBFBF',
  'id': 'Zayo (NL)',
  'label': 'Zayo (NL)',
  'size': 0,
  '

In [155]:
def gen_edges(edge_data):
    edges = []
    for item in edge_data:
        edges.append(new_edge(item[0], {}, item[1], 1))
    return edges

domestic_edges = gen_edges(DOMESTIC_EDGES)
inter_edges = gen_edges(INTER_EDGES)
len(inter_edges)

637

In [156]:
def gen_graph(nodes, edges):
    graph = {}
    graph["nodes"] = nodes
    graph["edges"] = edges
    return graph

domestic_graph = gen_graph(domestic_nodes, domestic_edges)
inter_graph = gen_graph(inter_nodes, inter_edges)

In [157]:
def dump_graph(file_name, graph):
    with open(file_name+".json", 'w') as outfile:
        json.dump(graph, outfile, indent=4, separators=(',', ': '), sort_keys=True)
        
dump_graph(DOMESTIC, domestic_graph)
dump_graph(INTER, inter_graph)