In [10]:
import pandas as pd
import numpy as np
import json
from scipy.interpolate import BPoly

In [11]:
with open('AvocadoGraph.json') as f:
    json_data = json.load(f)

json_data.keys()

dict_keys(['name', 'directed', 'strict', '_draw_', 'bb', 'layout', 'splines', 'xdotversion', '_subgraph_cnt', 'objects', 'edges'])

In [12]:
# Node Proceeding

ids = []
iso_codes = []
countries = []
sizes = []
regions = []
xs = []
ys = []

for node in json_data['objects']:
    ids.append(node['_gvid'])
    iso_codes.append(node['name'])
    countries.append(node['label'])
    sizes.append(node['size'])
    regions.append(node['region'])
    xs.append(float(node['pos'].split(',')[0]))
    ys.append(float(node['pos'].split(',')[1]))
    
nodes = pd.DataFrame([ids, iso_codes, countries, sizes, regions, xs, ys]).T
nodes.columns = ['id', 'iso_code', 'country', 'size', 'un_region', 'x', 'y']

nodes['type'] = 'node'

nodes.head()

Unnamed: 0,id,iso_code,country,size,un_region,x,y,type
0,0,MX,Mexico,52.922,Americas,359.54,686.48,node
1,1,CA,Canada,6.006,Americas,400.08,891.1,node
2,2,US,USA,60.0,Americas,192.47,783.97,node
3,3,CL,Chile,6.362,Americas,195.49,550.49,node
4,4,CN,China,2.821,Asia,45.99,569.06,node


In [13]:
# Curved Edge Proceeding

ids = []
source_ids = []
target_ids = []
regions = []
xs = []
ys = []

for edge in json_data['edges']:
    ids.append(edge['_gvid'])
    source_ids.append(edge['tail'])
    target_ids.append(edge['head'])
    regions.append(edge['region'])

    # PROCESSING THE EXTRACTED POS VARIABLE (1–6):
    # 1a - getting rid of the preceding 'e' (for a DIRECTED graph)
    # 1b — creating a control point array from a string
    array = [
        tuple([float(coord.split(',')[0]),
               float(coord.split(',')[1])])
        for coord in edge['pos'][2:].split(' ')
    ]

    # 2 — moving the first array point to the end (for a DIRECTED graph)
    cp = np.array(array[1:] + array[:1])

    # 3 — defining the curve based on control points in the range [0, 1]
    curve = BPoly(cp[:, None, :], [0, 1])

    # 4 — generating 50 equidistant points in the range from 0 to 1
    x = np.linspace(0, 1, 50)

    # 5 — getting the list of curve points
    p = curve(x)

    # 6 — extracting point xs and ys
    xs.append(p.T[0])
    ys.append(p.T[1])

edges = pd.DataFrame([ids, source_ids, target_ids, regions, xs, ys]).T
edges.columns = ['id', 'source_id', 'target_id', 'un_region', 'x', 'y']

edges['country_source'] = edges['source_id'].map(
    nodes.set_index('id')['country'].to_dict())
edges['country_target'] = edges['target_id'].map(
    nodes.set_index('id')['country'].to_dict())
edges = edges.drop(['source_id', 'target_id'], axis=1)
edges['path'] = [
    np.arange(len(x)) for x in edges['x']
]
edges = edges.explode(['x', 'y', 'path'])
edges['type'] = 'edge'

edges.head()

Unnamed: 0,id,un_region,x,y,country_source,country_target,path,type
0,0,Americas,376.35,758.77,Mexico,Canada,0,edge
0,0,Americas,377.305127,762.931492,Mexico,Canada,1,edge
0,0,Americas,378.222106,766.940925,Mexico,Canada,2,edge
0,0,Americas,379.102388,770.805551,Mexico,Canada,3,edge
0,0,Americas,379.947388,774.532381,Mexico,Canada,4,edge


In [14]:
# Non-Curved Edge Proceeding

'''
ids = []
source_ids = []
target_ids = []
regions = []
xs = []
ys = []

for edge in json_data['edges']:
    ids.append(edge['_gvid'])
    source_ids.append(edge['tail'])
    target_ids.append(edge['head'])
    regions.append(edge['region'])
    
    # PROCESSING THE EXTRACTED POS VARIABLE (1–6):
    # 1a - getting rid of the preceding 'e' (for a DIRECTED graph)
    # 1b - creating a control point array from a string
    array = [
        tuple([float(coord.split(',')[0]),
               float(coord.split(',')[1])])
        for coord in edge['pos'][2:].split(' ')
    ]
    
    # 2 - moving the first array point to the end (for a DIRECTED graph)
    points = np.array(array[1:] + array[:1])
    
    # 3 - extracting point xs and ys
    xs.append([p[0] for p in points])
    ys.append([p[1] for p in points])

edges = pd.DataFrame([ids, source_ids, target_ids, regions, xs, ys]).T
edges.columns = ['id', 'source_id', 'target_id', 'un_region', 'x', 'y']

edges['country_source'] = edges['source_id'].map(
    nodes.set_index('id')['country'].to_dict())
edges['country_target'] = edges['target_id'].map(
    nodes.set_index('id')['country'].to_dict())
edges = edges.drop(['source_id', 'target_id'], axis=1)
edges['path'] = [
    np.arange(len(x)) for x in edges['x']
]
edges = edges.explode(['x', 'y', 'path'])
edges['type'] = 'edge'

edges.head()
'''

"\nids = []\nsource_ids = []\ntarget_ids = []\nregions = []\nxs = []\nys = []\n\nfor edge in dirgraphcurved_dict['edges']:\n    ids.append(edge['_gvid'])\n    source_ids.append(edge['tail'])\n    target_ids.append(edge['head'])\n    regions.append(edge['region'])\n    array = [\n        tuple([float(coord.split(',')[0]),\n               float(coord.split(',')[1])])\n        for coord in edge['pos'][2:].split(' ')\n    ]\n    points = np.array(array[1:] + array[:1])\n    xs.append([p[0] for p in points])\n    ys.append([p[1] for p in points])\n\nedges = pd.DataFrame([ids, source_ids, target_ids, regions, xs, ys]).T\nedges.columns = ['id', 'source_id', 'target_id', 'un_region', 'x', 'y']\n\nedges['country_source'] = edges['source_id'].map(\n    nodes.set_index('id')['country'].to_dict())\nedges['country_target'] = edges['target_id'].map(\n    nodes.set_index('id')['country'].to_dict())\nedges = edges.drop(['source_id', 'target_id'], axis=1)\nedges['path'] = [\n    np.arange(len(x)) for x

In [15]:
# Table Concatenating

data = pd.concat([nodes, edges], ignore_index=True) 
data.to_csv('network.csv')