In [8]:
from time import time
import re
import json
import math
from natsort import natsorted
from glob import glob
from pathlib import Path
import os

# import pydot
import networkx as nx
from networkx.drawing.nx_pydot import graphviz_layout
from networkx.drawing.nx_pydot import read_dot

from matplotlib import collections  as mc
from tqdm.notebook import tqdm

import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.style.use('seaborn-colorblind')
from IPython.display import clear_output

nodePattern = re.compile('^(\d+) ')
edgePattern = re.compile('^(\d+) -- (\d+)')
attrsPattern = re.compile('\[(.+)\]')

def isNode(line):
    return '--' not in line


def processAttrs(line):
    attrs = attrsPattern.findall(line)
    if len(attrs) > 0:
        attrs = attrs[0].split(',')
    attrs = [a.split('=') for a in attrs]
    attrs = [[a[0].strip(), a[1].replace('"','').strip()] for a in attrs]
    for a in attrs:
        try:
            a[1] = int(a[1])
        except ValueError:
            try:
                a[1] = float(a[1])
            except:
                pass
    return attrs


def processEdge(line):
    finding = edgePattern.findall(line)[0]
    source, target = finding[:2]
    source, target = int(source), int(target)
    attrs = processAttrs(line)
    return dict(attrs, source=source, target=target)


def processNode(line):
    nodeId = int(line.split(' ')[0])
    nodeAttrs = processAttrs(line)
    return dict(nodeAttrs, id=nodeId)


def draw(g, pos, edges=True, labels=True, figsize=[8,8], s=2, lw=0.5):
    xy = np.array(list([pos[k] for k in g.nodes]))
    fig = plt.figure(figsize=figsize)
    ax = fig.subplots()
    
    ## nodes
    ax.scatter(xy[:,0], xy[:,1], s=s, zorder=3)
    
    ## edges
    if edges:
        lines = [[pos[i], pos[j]] for (i,j) in g.edges]
        lc = mc.LineCollection(lines, colors='grey', linewidths=lw)
        ax.add_collection(lc)
    ax.autoscale()
    ax.margins(0.1)
#     plt.axis('equal')

    if labels:
        for i in g.nodes:
            plt.text(pos[i][0], pos[i][1], g.nodes[i]['label'])
    plt.show()

#     plt.figure(figsize=figsize)
#     nx.draw(
#         g, 
#         pos=pos,
#         node_size=10,
#         width=0.5,
#     )




def subtree_sizes(tree, root):
    tree = nx.bfs_tree(g, source=root)
    s = [len(nx.bfs_tree(tree, i).nodes) for i in tree.neighbors(root)]
    total = sum(s)
    return np.array(s)


def fan(nodes, origin=[0,0], radius=1, phaseCenter=0, phaseRange=np.pi, ratio=[1,1]):
    pos = {}
    phases = {}
    ranges = {}
    n = len(nodes)
    cos, sin = np.cos, np.sin
    
    ratioTotal = sum(ratio)
    ratio = [r/ratioTotal for r in ratio]
    
    
    nr = sorted(zip(nodes, ratio), key=lambda x:x[1])
    nr2 = []
    for i in range(len(nr)-1, -1, -1):
        if i%2 == 0:
            nr2.append(nr[i])
        else:
            nr2.insert(0, nr[i])
    nodes, ratio = zip(*nr2)
    
    
    ratioCumSum = [sum(ratio[:i]) for i in range(len(ratio)+1)]
    for i in range(n):
        angle_offset = (ratioCumSum[i]+ratioCumSum[i+1])/2 * phaseRange
        angle_i = phaseCenter - phaseRange/2 + angle_offset
        pos[nodes[i]] = [radius*cos(angle_i), radius*sin(angle_i)]
        phases[nodes[i]] = angle_i
        ranges[nodes[i]] = ratio[i] * phaseRange * 0.95
    return pos, phases, ranges


def radial_layout(g, root=None):
    g0 = g
    g = nx.bfs_tree(g, source=root)
    origin = [0,0]
    pos = {}
    phases = {}
    ranges = {}
    if root is None:
        root = next(iter(g.nodes))
    pos[root] = origin
    phases[root] = 0
    ranges[root] = np.pi*2
#     neighbors = list(g.neighbors(root))
    radius = 0
    roots = [root, ]
    while len(pos) < len(g.nodes):
        radius += 1
        newRoots = []
        for root in roots:
            neighbors = [n for n in g.neighbors(root) if n not in pos]
            subTreeSizes = [len(nx.bfs_tree(g, i).nodes) for i in neighbors]
#             neighborSizes = [len(list(g0.neighbors(i))) for i in neighbors]
            newRoots += neighbors
            if len(neighbors) > 0:
                newPos, newPhases, newRanges = fan(
                    neighbors, 
                    origin, 
                    radius, 
                    phaseCenter=phases[root], 
                    phaseRange=ranges[root], 
                    ratio=subTreeSizes,
                )
                pos.update(newPos)
                phases.update(newPhases)
                ranges.update(newRanges)
        roots = newRoots
    return pos

def normalize(node):
    for prop in node:
        
            
        if prop == 'pos':
            pos = node[prop].replace('"', '')
            pos = pos.split(',')
            pos = [float(pos[0]), float(pos[1])]
            node[prop] = pos
        else:
            if type(node[prop]) == str:
                node[prop] = node[prop].replace('"', '')
                
            try:
                node[prop] = int(node[prop])
            except ValueError:
                try:
                    node[prop] = float(node[prop])
                except ValueError:
                    pass
            except Exception as err:
                print(err)
                print(node, prop)


## generate a graph

### math genealogy

In [None]:
from collections import OrderedDict

In [None]:
fn = './data/txt/math-genealogy/data_names_shortened.txt'

# label_to_id = {'K. Müller':66,'A. E. R. Kneschke':22,'B. Karstens':70,'K. E. Stork':35,'W. Fricke':9,'R. C. Straubel':8,'K. Ludwig':49,'W. Lauf':84,'R. Buchweitz':81,'A. K. Holzwarth':24,'O. Mayer':65,'W. von Heygendorff':2,'D. Schuldt':67,'L. Kämmerer':68,'J. Meyer':11,'H. Kessler':12,'G. H. Wannier':30,'V. Hartmann':69,'S. M. geb. Zacharias':56,'A. Hagenbach':3,'L. Neumann':16,'M. Mitchell':83,'M. D. Thomure':91,'E. Oettinger':18,'O. Volk':14,'W. Schorcht':10,'J. Ostwald':0,'M. T. geb. Deutschmann':58,'W. Landecker':89,'H. Tietz':33,'G. Schmieder':82,'K. Heun':6,'M. Krafft':17,'P. Peter':61,'A. Tafelmacher':7,'E. C. G. Stueckelberg':26,'M. Fruth':54,'K. Ebbinghaus':4,'K. J. Thomae':1,'H. Schreiter':51,'G. F. L. Frege':5,'G. Wiarda':19,'M. Cenek':92,'W. Müller':53,'F. W. M. Müller':23,'W. Merten':20,'E. Pippig':71,'P. Katilius':15,'H. Bückner':34,'K. Potthoff':78,'C. Schoen':64,'K. Mätzel':50,'D. Hudak':55,'M. Schoch':52,'H. K. O. Liebmann':13,'K. Neumann':57,'R. Hopsch':60,'G. Stiege':80,'D. R. Hofstadter':32,'D. Tzscharschuch':59,'P. Ghosh':93,'A. Thedy':21,'H. Jäckel':47,'R. Juengling':90,'V. Padervinskas':31,'A. Wolf':25,'E. C. J. Schering':5}
# label_to_id['G. F. L. Frege'] = 999
# id_to_label = {66:'K. Müller',22:'A. E. R. Kneschke',70:'B. Karstens',35:'K. E. Stork',9:'W. Fricke',8:'R. C. Straubel',49:'K. Ludwig',84:'W. Lauf',81:'R. Buchweitz',24:'A. K. Holzwarth',65:'O. Mayer',2:'W. von Heygendorff',67:'D. Schuldt',68:'L. Kämmerer',11:'J. Meyer',12:'H. Kessler',30:'G. H. Wannier',69:'V. Hartmann',56:'S. M. geb. Zacharias',3:'A. Hagenbach',16:'L. Neumann',83:'M. Mitchell',91:'M. D. Thomure',18:'E. Oettinger',14:'O. Volk',10:'W. Schorcht',0:'J. Ostwald',58:'M. T. geb. Deutschmann',89:'W. Landecker',33:'H. Tietz',82:'G. Schmieder',6:'K. Heun',17:'M. Krafft',61:'P. Peter',7:'A. Tafelmacher',26:'E. C. G. Stueckelberg',54:'M. Fruth',4:'K. Ebbinghaus',1:'K. J. Thomae',51:'H. Schreiter',5:'G. F. L. Frege',19:'G. Wiarda',92:'M. Cenek',53:'W. Müller',23:'F. W. M. Müller',20:'W. Merten',71:'E. Pippig',15:'P. Katilius',34:'H. Bückner',78:'K. Potthoff',64:'C. Schoen',50:'K. Mätzel',55:'D. Hudak',52:'M. Schoch',13:'H. K. O. Liebmann',57:'K. Neumann',60:'R. Hopsch',80:'G. Stiege',32:'D. R. Hofstadter',59:'D. Tzscharschuch',93:'P. Ghosh',21:'A. Thedy',47:'H. Jäckel',90:'R. Juengling',31:'V. Padervinskas',25:'A. Wolf',5:'E. C. J. Schering'}
# id_to_label = {v:k for k,v in label_to_id.items()}

my_edges = [['J. Ostwald', 'K. J. Thomae'], ['J. Ostwald', 'W. von Heygendorff'], ['J. Ostwald', 'A. Hagenbach'], ['J. Ostwald', 'K. Ebbinghaus'], ['K. J. Thomae', 'E. C. J. Schering'], ['E. C. J. Schering', 'G. F. L. Frege'], ['E. C. J. Schering', 'K. Heun'], ['E. C. J. Schering', 'A. Tafelmacher'], ['K. J. Thomae', 'R. C. Straubel'], ['R. C. Straubel', 'W. Fricke'], ['R. C. Straubel', 'W. Schorcht'], ['R. C. Straubel', 'J. Meyer'], ['R. C. Straubel', 'H. Kessler'], ['K. J. Thomae', 'H. K. O. Liebmann'], ['H. K. O. Liebmann', 'O. Volk'], ['H. K. O. Liebmann', 'P. Katilius'], ['K. J. Thomae', 'L. Neumann'], ['L. Neumann', 'M. Krafft'], ['L. Neumann', 'E. Oettinger'], ['L. Neumann', 'G. Wiarda'], ['L. Neumann', 'W. Merten'], ['R. C. Straubel', 'A. Thedy'], ['G. Wiarda', 'A. E. R. Kneschke'], ['H. K. O. Liebmann', 'F. W. M. Müller'], ['F. W. M. Müller', 'A. K. Holzwarth'], ['F. W. M. Müller', 'A. Wolf'], ['A. Hagenbach', 'E. C. G. Stueckelberg'], ['E. C. G. Stueckelberg', 'G. H. Wannier'], ['P. Katilius', 'V. Padervinskas'], ['G. H. Wannier', 'D. R. Hofstadter'], ['M. Krafft', 'H. Tietz'], ['M. Krafft', 'H. Bückner'], ['M. Krafft', 'K. E. Stork'], ['A. E. R. Kneschke', 'H. Jäckel'], ['H. Jäckel', 'K. Ludwig'], ['H. Jäckel', 'K. Mätzel'], ['H. Jäckel', 'H. Schreiter'], ['A. E. R. Kneschke', 'M. Schoch'], ['A. E. R. Kneschke', 'W. Müller'], ['K. Ludwig', 'M. Fruth'], ['K. Ludwig', 'D. Hudak'], ['K. Ludwig', 'S. M. geb. Zacharias'], ['K. Ludwig', 'K. Neumann'], ['K. Ludwig', 'M. T. geb. Deutschmann'], ['W. Müller', 'D. Tzscharschuch'], ['W. Müller', 'R. Hopsch'], ['W. Müller', 'P. Peter'], ['K. Neumann', 'C. Schoen'], ['K. Neumann', 'O. Mayer'], ['M. Schoch', 'K. Müller'], ['K. Müller', 'D. Schuldt'], ['K. Neumann', 'L. Kämmerer'], ['H. Schreiter', 'V. Hartmann'], ['H. Schreiter', 'B. Karstens'], ['H. Schreiter', 'E. Pippig'], ['H. Tietz', 'K. Potthoff'], ['H. Tietz', 'R. Buchweitz'], ['H. Tietz', 'G. Schmieder'], ['H. Tietz', 'G. Stiege'], ['D. R. Hofstadter', 'M. Mitchell'], ['G. Schmieder', 'W. Lauf'], ['M. Mitchell', 'W. Landecker'], ['M. Mitchell', 'R. Juengling'], ['M. Mitchell', 'M. D. Thomure'], ['M. Mitchell', 'M. Cenek'], ['M. Mitchell', 'P. Ghosh']]
# edge_distance = {0:50,1:50,2:50,3:50,4:50,5:50,6:50,7:50,8:50,9:50,10:50,11:50,12:50,13:50,14:50,15:50,16:50,17:50,18:50,19:50,20:50,21:50,22:50,23:50,24:50,25:50,26:50,27:50,28:50,29:50,30:50,31:50,32:50,33:50,34:50,35:50,36:50,37:50,38:50,39:50,40:50,41:50,42:50,43:50,44:50,45:50,46:50,47:50,48:50,49:50,50:50,51:50,52:50,53:50,54:50,55:50,56:50,57:50,58:50,59:50,60:50,61:50,62:50,63:50,64:50,65:50,66:50}


edges = sum(my_edges, [])
labels = OrderedDict()
labels.update(zip(edges,edges))

label_to_id = {l:i for i,l in enumerate(labels)}
id_to_label = {v:k for k,v in label_to_id.items()}


g = nx.Graph()
g.add_nodes_from(label_to_id.values())
g.add_edges_from([[label_to_id[e0], label_to_id[e1]] for e0,e1 in my_edges])

for i,n in enumerate(g.nodes):
    g.nodes[n]['level'] = 1
    g.nodes[n]['index'] = i
    g.nodes[n]['id'] = n
    g.nodes[n]['label'] = id_to_label[n]
    
for e in g.edges:
    g.edges[e]['level'] = 1
    g.edges[e]['weight'] = 50

i2k = sorted(list(g.nodes))
k2i = {k:i for i,k in enumerate(i2k)}


print('all_pairs_shortest_path...')
apsp = nx.all_pairs_dijkstra_path_length(g, weight='weight')
d = np.zeros([len(g.nodes),len(g.nodes)])
for dk in tqdm(apsp):
    source = k2i[dk[0]]
    target_dist = dk[1]
    d[source,:] = [target_dist[i2k[i]] for i in range(len(g.nodes))]
    
    
# print('k-hop all_pairs_shortest_path...')
apsp = nx.all_pairs_dijkstra_path_length(g, weight=1)
hops = np.zeros([len(g.nodes),len(g.nodes)])
for dk in tqdm(apsp):
    source = k2i[dk[0]]
    target_dist = dk[1]
    hops[source,:] = [target_dist[i2k[i]] for i in range(len(g.nodes))]


### Faryad's Google Topics - 500 / 5,000 nodes

In [14]:
def edges2graph(lines, i2k=None, label2i=None):
    pattern = re.compile('"(.+)" -- "(.+)"')
    nodes = set()
    edges = set()
    for line in lines:
        if len(line.strip()) > 0:
            edge = re.findall(pattern, line)[0]
            source, target = edge
            nodes.update([source, target])
            edges.add( (source, target) )
    
    if label2i is None:
        label2i = {k:i for i,k in enumerate(nodes)}
        i2k = list(range(len(nodes)))
    g = nx.Graph()
    
    nodes = [dict(id=label2i[k], label=k) for i,k in enumerate(nodes)]
    ids = [n['id'] for n in nodes]
    g.add_nodes_from( zip(ids, nodes) )
    
    edges = [(i2k[label2i[e[0]]],i2k[label2i[e[1]]]) for e in edges]
    g.add_edges_from(edges)
    return g, i2k, label2i


# fns = natsorted(glob('./data/txt/topics_faryad_500/*.txt'))
fns = natsorted(glob('./data/txt/topics_faryad_5000/*.txt'))[:7]
fn_level_pairs = list(zip(fns, range(1, len(fns)+1, 1)))
max_level = len(fns)

print(fn_level_pairs[-1])
with open(fns[-1]) as f:
    g, i2k, label2i = edges2graph(f.readlines())
    level = max_level
    for n in g.nodes:
        g.nodes[n]['level'] = level
    for e in g.edges:
        g.edges[e]['level'] = level
        g.edges[e]['weight'] = (max_level - level + 1)*50
        
for fn, level in list(fn_level_pairs)[:-1][::-1]:
    print(fn, level)
    with open(fn) as f:
        subgraph,_,_ = edges2graph(f.readlines(), i2k, label2i)
        for n in subgraph.nodes:
            g.nodes[n]['level'] = level
            
        for e in subgraph.edges:
            g.edges[e]['level'] = level
            g.edges[e]['weight'] = (max_level - level + 1)*50
#             print(e, g.edges[e]['weight'])

print('all_pairs_shortest_path...')
apsp = nx.all_pairs_dijkstra_path_length(g, weight='weight')
d = np.zeros([len(g.nodes),len(g.nodes)])
for dk in tqdm(apsp):
    source = dk[0]
    target_dist = dk[1]
    d[source,:] = [target_dist[i] for i in range(len(g.nodes))]
    
    
# print('k-hop all_pairs_shortest_path...')
apsp = nx.all_pairs_dijkstra_path_length(g, weight=1)
hops = np.zeros([len(g.nodes),len(g.nodes)])
for dk in tqdm(apsp):
    source = dk[0]
    target_dist = dk[1]
    hops[source,:] = [target_dist[i] for i in range(len(g.nodes))]


('./data/txt/topics_faryad_5000/Graph_1600.txt', 7)
./data/txt/topics_faryad_5000/Graph_1200.txt 6
./data/txt/topics_faryad_5000/Graph_800.txt 5
./data/txt/topics_faryad_5000/Graph_500.txt 4
./data/txt/topics_faryad_5000/Graph_200.txt 3
./data/txt/topics_faryad_5000/Graph_100.txt 2
./data/txt/topics_faryad_5000/Graph_50.txt 1
all_pairs_shortest_path...


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




### Ryn's lastfm

In [None]:
## Ryn's lastfm

print('loading graph...')

fns = natsorted(glob('./data/dot/lastfm-ryn/*.dot'))
fn_level_pairs = zip(fns, range(1, len(fns)+1, 1))

# # ## load deepest level
fn = fns[-1]
g = nx.Graph(read_dot(fn))## multi-graph to graph

max_level = len(fns)

level = max_level
for k in g.nodes:
    g.nodes[k]['level'] = level
    normalize(g.nodes[k])
for e in g.edges:
    g.edges[e]['weight'] = (max_level - level + 1)*50

#     normalize(g.edges[e])
    
i2k = sorted(list(g.nodes), key=lambda x:int(x))
k2i = {k:i for i,k in enumerate(i2k)}

# # # ## modify node and edge attributes
for i, fn in enumerate(fns[:-1][::-1], 1):
    level = max_level - i
    print(fn, level)
    
    gi = nx.Graph(read_dot(fn))## multi-graph to graph
    for k in gi.nodes:
        g.nodes[k]['level'] = level
        normalize(g.nodes[k])
    for e in gi.edges:
        g.edges[e]['weight'] = (max_level - level + 1)*50
        
fn = fns[-1]


adj = nx.adjacency_matrix(g).toarray().astype(np.float32)

print('all_pairs_shortest_path...')
apsp = nx.all_pairs_dijkstra_path_length(g, weight='weight')
d = np.zeros([len(g.nodes),len(g.nodes)])
for dk in tqdm(apsp):
    k = dk[0]
    d[k2i[k],:] = [dk[1][k] for k in i2k]
    
print('k-hop all_pairs_shortest_path...')
apsp = nx.all_pairs_dijkstra_path_length(g, weight=1)
hops = np.zeros([len(g.nodes),len(g.nodes)])
for dk in tqdm(apsp):
    k = dk[0]
    hops[k2i[k],:] = [dk[1][k] for k in i2k]


In [None]:
# ## real data
# fn = './data/dot/topics-iqbal/Topics_Layer_1.dot'
# # fn = './data/dot/lastfm-ryn/lastfm_155nodes.dot'
# nodes = []
# edges = []
# with open(fn) as f:
#     lines = f.readlines()[:-1]
#     lines[0] = lines[0].split('{')[1].strip()
#     for line in lines:
#         if isNode(line):
#             nodes.append(processNode(line))
#         else:
#             edges.append(processEdge(line))
# n = len(nodes)
# nodes = sorted(nodes, key=lambda x:x['id'])
# i2k = [n['id'] for n in nodes]

# g = nx.Graph()
# g.add_nodes_from([n['id'] for n in nodes])
# g.add_edges_from([(e['source'], e['target']) for e in edges])

# print(len(nodes), len(edges), nx.is_tree(g))

# adj = nx.adjacency_matrix(g)
# adj = adj.toarray().astype(np.float)
# # d = adj + (1-adj)*1000
# # d *= (1-np.eye(len(nodes)))


# ## all_pairs_shortest_path
# apsp = nx.all_pairs_shortest_path_length(g)
# d = np.zeros([len(nodes),len(nodes)])
# for i,di in enumerate(tqdm(sorted(apsp))):
#     nodeId = di[0]
#     lengths = [di[1][k] for k in i2k]
#     d[i,:] = lengths
# #     print(nodeId)


## Initial Layout

In [None]:
## test graph
# g = nx.balanced_tree(5, 3)

In [None]:
t0 = time()

# pos0 = nx.layout.planar_layout(g, scale=40)
# pos0 = graphviz_layout(g, prog="dot", root=list(g.nodes)[0])
# pos0 = graphviz_layout(g, prog='twopi')
pos0 = graphviz_layout(g, prog='sfdp')
pos = pos0.copy()
dt = time() - t0
print(f'{dt} sec')

# from scipy.spatial.distance import jensenshannon
# metric = []
# for i in range(len(nodes)):
#     s = subtree_sizes(g, i2k[i])
#     uniform = np.ones(len(s)) / len(s)
#     js = jensenshannon(s, uniform) 
#     metric.append(js)


# metric = d.max(axis=1)
# iBest = np.argmin(metric)
# # iBest = np.argmax(metric)
# pos = radial_layout(g, list(g.nodes)[iBest])


# draw(g, pos, s=10, lw=1, labels=False, figsize=[64,64])


In [None]:
draw(g, pos, s=10, lw=1, labels=False, figsize=[12,12])

In [None]:
draw(g, pos, s=10, lw=1, labels=True, figsize=[12,12])

## crossing removal

In [None]:
def isCrossed(e0, e1, pos=None):
    p0,p1 = e0
    q0,q1 = e1
    
    if p0 == q0 or p0 == q1 or p1==q0 or p1==q1: ##if two edges shares a node
        return False
    else:
        p0 = pos[p0]
        p1 = pos[p1]
        q0 = pos[q0]
        q1 = pos[q1]
        e0 = (pos[e0[0]],pos[e0[1]])
        e1 = (pos[e1[0]],pos[e1[1]])
        
        s00 = signOf(q0, e0)
        s10 = signOf(q1, e0)
        s01 = signOf(p0, e1)
        s11 = signOf(p1, e1)
        return s00*s10 <= 0 and s01*s11 <= 0 

    
def signOf(p, e):
    '''sign of point p with repect to the line of edge e'''
    px, py = p
    ax, ay = e[0]
    bx, by = e[1]
    
    a = ay - by
    b = bx - ax
    c = ay * (ax - bx) - ax * (ay - by)
    
    z = px*a + py*b + c
    return np.sign(z)


def subtreeSize(tree, node):
    subtree = nx.bfs_tree(tree, node)
    return len(subtree.nodes), subtree


def shrink(subtree=None, origin=0, by=0.5, pos=None):
    scaleFactor = by
    ox, oy = pos[origin]
    for k in subtree.nodes:
        x,y = pos[k]
        x = ox + scaleFactor * (x-ox)
        y = oy + scaleFactor * (y-oy)
        pos[k] = (x,y)

        
## find a good root
id_pos = pos.items()

ids = np.array([i[0] for i in id_pos])
x = np.array([i[1] for i in id_pos])

centroid = np.mean(x, 0)
dist_to_centroid = np.linalg.norm(x - centroid, 2, 1)
root = ids[np.argmin(dist_to_centroid)]
print('root:', g.nodes[root])

tree = nx.bfs_tree(g, source=root)
scaleFactor = 0.7
hasCrossing = True
while hasCrossing:
    hasCrossing = False
    for e0 in g.edges:
        for e1 in g.edges:
            if isCrossed(e0, e1, pos):
                hasCrossing = True
    #             print(e0, e1)
                sts0, subtree0 = subtreeSize(tree, e0[1])
                sts1, subtree1 = subtreeSize(tree, e1[1])
                if sts0 < sts1:
                    shrink(subtree=subtree0, by=scaleFactor, origin=e0[0], pos=pos)
                else:
                    shrink(subtree=subtree1, by=scaleFactor, origin=e1[0], pos=pos)
                clear_output(wait=True)
                draw(g, pos, labels=False, s=10, lw=1, figsize=[5,5])
                plt.show()

In [None]:
# xy = np.array( [ pos[i2k[i]]for i in range(len(nodes)) ] )
# theta = (xy[:,0] - xy[:,0].min()) / (xy[:,0].max() - xy[:,0].min()) * np.pi*1
# r = (xy[:,1] - xy[:,1].min())
# # r = -(xy[:,1] - xy[:,1].max())

# xy2 = np.c_[r*np.cos(theta), r*np.sin(theta)]
# pos2 = {i2k[i]:xy2[i] for i in range(len(nodes))}
# draw(g, pos2)

# # pos = pos2

In [None]:
# from umap import UMAP

# n_neighbors = 15

# umap = UMAP(
#     n_components=2,
#     n_neighbors=n_neighbors, 
#     min_dist=0.3,
#     metric='precomputed',
# #     n_epochs=500,
# #     negative_sample_rate=150,
# #     learning_rate=0.0001,
# #     init=np.array(list(pos.values()))
# )
# xy = umap.fit_transform(d)
# pos2 = {i2k[i]:xy[i,:2] for i in range(len(nodes))}


## New Ordering of Nodes

In [15]:
##max degree node
degree = list(g.degree)
degree = list(zip(range(len(degree)), degree))
max_degree_node = max(degree, key=lambda x:x[1][1])
start = max_degree_node[1][0]

## random node, bfs
# start = next(iter(g.nodes.keys()))
node_order = list(nx.bfs_tree(g, start))

##dfs
# node_order = list(nx.dfs_preorder_nodes(g, start))

## no re-ordering
# node_order = list(g.nodes)

## to json

In [16]:
##graph to list
nodes = {k: g.nodes[k] for k in g.nodes}
edges = [[e[0], e[1], g.edges[e]] for e in g.edges]
nodes = [{
    'id': node_order[i],
    'index': i,
    **nodes[node_order[i]]
} for i in range(len(nodes))]
edges = [{
    'source': e[0],
    'target': e[1],
    **e[2]
} for e in edges]

In [17]:
len(g.nodes)

1601

In [18]:
# ##store the position & perplexity
for i,node in enumerate(nodes):
#     node['x'] = pos[node_order[i]][0]
#     node['y'] = pos[node_order[i]][1]
    node['neighbors'] = list(nx.neighbors(g, node['id']))
#     node['x'] = node['pos'][0]
#     node['y'] = node['pos'][1]
    node['perplexity'] = len(list(nx.neighbors(g, node_order[i])))


virtual_edges = []
for i in tqdm(range(len(nodes))):
    for j in range(i+1, len(nodes)):
        if d[i,j] == 0:
            print(f'[warning] d[{i},{j}] = 0')
#         elif hops[i,j] > 12:
#             continue
        else:
            e = {
                'source': i2k[i],
                'target': i2k[j],
                'weight': d[i,j],
                'hops': hops[i,j]
            }
            virtual_edges.append(e)
        
fn_out = fn.replace('dot', 'json').replace('txt', 'json')
print(fn_out)

if not Path(fn_out).parent.exists():
    os.makedirs(Path(fn_out).parent)
with open(fn_out, 'w') as f:
    json.dump(dict(
        edges = edges, 
        virtual_edges = virtual_edges, 
        nodes=nodes
    ), f, indent=2)
print('done')

HBox(children=(FloatProgress(value=0.0, max=1601.0), HTML(value='')))


./data/json/topics_faryad_5000/Graph_50.json
done


In [None]:
len(g.nodes)

In [None]:
for n in nodes:
    n['id'] = n['index']
    del n['x']
    del n['y']