In [2]:
import pandas as pd
import numpy as np
import pygraphviz as pgv

In [3]:
nodes_init = pd.read_csv('node_data.csv', index_col=0)
edges_init = pd.read_csv('edge_data.csv', index_col=0)

In [6]:
nodes_init['diameter'] = [
    np.sqrt(x / np.pi) * 2 for x in nodes_init['size'].tolist()
]
nodes_init.head()

Unnamed: 0,country,iso_code,un_region,avocado_trade,size,diameter
0,USA,US,Americas,3215264000.0,60.0,8.740387
1,Mexico,MX,Americas,2835944000.0,52.921511,8.208639
2,Netherlands,NL,Europe,2248525000.0,41.959688,7.309223
3,Peru,PE,Americas,1474335000.0,27.512551,5.91862
4,Spain,ES,Europe,972406800.0,18.146069,4.806693


In [7]:
edges_init.head()

Unnamed: 0,country_source,region_source,country_target,region_target,iso_source,iso_target
0,Mexico,Americas,Canada,Americas,MX,CA
1,USA,Americas,Canada,Americas,US,CA
2,Chile,Americas,China,Asia,CL,CN
3,Mexico,Americas,Chile,Americas,MX,CL
4,Peru,Americas,Chile,Americas,PE,CL


In [9]:
# Building the graph

layout_dict = dict()

for s in edges_init['iso_source'].unique().tolist():
    s_list = edges_init[edges_init['iso_source'] == s]['iso_target'].unique().tolist()
    layout_dict[s] = s_list

diameter_dict = nodes_init.set_index('iso_code')['diameter'].to_dict()

G = pgv.AGraph(layout_dict, directed=True, strict=False, splines='curved')

for i, node in enumerate(G.iternodes()):
    node.attr['shape'] = 'circle'
    node.attr['width'] = diameter_dict[node]
    node.attr['height'] = diameter_dict[node]
    node.attr['fixedsize'] = True
    node.attr['fontsize'] = 1

for i, edge in enumerate(G.iteredges()):
    edge.attr['len'] = 50
        
G.layout(prog='fdp')

graph_width = int(G.graph_attr['bb'].split(',')[2])
graph_height = int(G.graph_attr['bb'].split(',')[3])

In [41]:
# New nodes

node_dict = dict()

for node in G.nodes():
    node_dict[node] = dict()
    node_dict[node]['x'] = float(node.attr['pos'].split(',')[0])
    node_dict[node]['y'] = float(node.attr['pos'].split(',')[1])
    node_dict[node]['diameter'] = float(node.attr['width'])

nodes = pd.DataFrame.from_dict(node_dict, orient='index').reset_index()

nodes = nodes.set_index('index').join(
    nodes_init.set_index('iso_code')[['country', 'un_region',
                                      'size']]).reset_index()

nodes['type'] = 'node'

nodes.head()

Unnamed: 0,index,x,y,diameter,country,un_region,size,type
0,MX,1588.6,1056.8,8.2083,Mexico,Americas,52.921511,node
1,CA,1769.9,99.135,2.7639,Canada,Americas,6.005643,node
2,CL,1741.3,2044.4,2.8472,Chile,Americas,6.361745,node
3,JP,1157.5,823.57,1.9444,Japan,Asia,2.986355,node
4,GB,1123.7,2426.1,2.6944,UK,Europe,5.711713,node


In [42]:
# New edges

ids = []
source_ids = []
target_ids = []
xs = []
ys = []

i=0
for edge in G.edges():
    
    ids.append(i)
    source_ids.append(edge[0])
    target_ids.append(edge[1])

    # PROCESSING THE EXTRACTED POS VARIABLE (1–6):
    # 1 — creating a control point array from a string
    array = [
        tuple([float(coord.split(',')[0]),
               float(coord.split(',')[1])])
        for coord in edge.attr['pos'][2:].split(' ')
    ]

    # 2 — moving the first array point to the end
    cp = np.array(array[1:] + array[:1])

    # 3 — defining the curve based on control points in the range [0, 1]
    curve = BPoly(cp[:, None, :], [0, 1])

    # 4 — generating 50 equidistant points in the range from 0 to 1
    x = np.linspace(0, 1, 50)

    # 5 — getting the list of curve points
    p = curve(x)

    # 6 — extracting point xs and ys
    xs.append(p.T[0])
    ys.append(p.T[1])
    
    i+=1

edges = pd.DataFrame([ids, source_ids, target_ids, xs, ys]).T
edges.columns = ['id', 'source_id', 'target_id', 'x', 'y']

edges['un_region'] = edges['source_id'].map(
    nodes.set_index('index')['un_region'].to_dict())
edges['country_source'] = edges['source_id'].map(
    nodes.set_index('index')['country'].to_dict())
edges['country_target'] = edges['target_id'].map(
    nodes.set_index('index')['country'].to_dict())
edges = edges.drop(['source_id', 'target_id'], axis=1)
edges['path'] = [
    np.arange(len(x)) for x in edges['x']
]
edges = edges.explode(['x', 'y', 'path'])
edges['type'] = 'edge'

edges.head()

Unnamed: 0,id,x,y,un_region,country_source,country_target,path,type
0,0,1657.4,769.15,Americas,Mexico,Canada,0,edge
0,0,1663.397999,743.764789,Americas,Mexico,Canada,1,edge
0,0,1669.106008,719.492733,Americas,Mexico,Canada,2,edge
0,0,1674.535645,696.273531,Americas,Mexico,Canada,3,edge
0,0,1679.69823,674.049126,Americas,Mexico,Canada,4,edge


In [43]:
# Table Concatenating

data = pd.concat([nodes, edges], ignore_index=True) 
data.to_csv('network_pgv.csv')

In [44]:
# Canvas width

graph_width

2644

In [45]:
# Canvas height

graph_height

3074

In [46]:
# Height / Width ratio

3074/2644

1.1626323751891074