In [1]:
from glob import glob
from natsort import natsorted
import json

def read_external(fn):
    res = {}
    with open(fn) as f:
        for line in f:
            name, value = line.split(' = ')
            value = json.loads(value)
            res[name] = value
    return res


def read_ref(fn):
    with open(fn) as f:
        res = json.load(f)
    return res
    
    
def align(data_ext, data_ref):
    ## ext
    label_to_x = {data_ext['alphanum_to_org'][l]:data_ext['crd_x'][str(data_ext['label_to_id'][l])] for l in data_ext['label_to_id']}
    label_to_y = {data_ext['alphanum_to_org'][l]:data_ext['crd_y'][str(data_ext['label_to_id'][l])] for l in data_ext['label_to_id']}
    ## ref
    index_to_label = {i:l for i,l in enumerate(data_ref['node_label'])}
    
    ## match
    node_x = [label_to_x[index_to_label[i]] for i in range(len(index_to_label))]
    node_y = [label_to_y[index_to_label[i]] for i in range(len(index_to_label))]
    
    res_ref = data_ref.copy()
    res_ref['node_x'] = node_x
    res_ref['node_y'] = node_y
    
    return res_ref, objectify(res_ref)
    
    
def objectify(data, 
              prefix='node_', 
              terms=set(['id', 'x', 'y', 'label', 'level', 'parent', 'nodeCount', 'weight'])):
    keys = [k for k in data if k.startswith(prefix) and k[len(prefix):] in terms]
#     keys_out = [k[len(prefix):] for k in keys]
    res = [{k[len(prefix):]:data[k][i] for k in keys}
        for i in range(len(data[keys[0]]))]
    return res


In [2]:
dir_in = './data/external/in/'
glob(f'{dir_in}/*.json')

['./data/external/in/lastfm-DELG.json',
 './data/external/in/tol-DELG.json',
 './data/external/in/topics-DELG.json']

In [None]:

# fn_ext = f'./data/external/in/lastfm-DELG.json'
# fn_ref = f'data/json/lastfm_refined/Graph_8_2587-min.json'
# fn_ext = f'./data/external/in/topics-DELG.json'
# fn_ref = f'data/json/topics_faryad_8level_linear/Graph_5000-1615834916-min.json'
# fn_ext = f'./data/external/in/tol-DELG.json'
# fn_ref = f'data/json/tol_graphs_linear/Graph_4-1615872482-min.json'

fn_out = fn_ext.replace('/in/', '/').replace('.json', '-min.json')
fn_out_nodes = fn_ext.replace('/in/', '/').replace('.json', '-nodes-0.json')
print(fn_out, fn_out_nodes)


In [None]:
data_ext = read_external(fn_ext)
data_ref = read_ref(fn_ref)
# print(data_ext.keys())
# print(data_ref.keys())
data, nodes = align(data_ext, data_ref)

with open(fn_out, 'w') as f:
    json.dump(data, f, indent=2)
with open(fn_out_nodes, 'w') as f:
    json.dump(nodes, f, indent=2) 

---

In [None]:
def read_khaled(fn_labels, fn_coords, fn_edges):
    xs = []
    ys = []
    labels = []
    with open(fn_labels) as f_labels, open(fn_coords) as f_coord:
        for xy, label in zip(f_coord, f_labels):
            xy = xy.split()[:2]
            x,y = float(xy[0]), float(xy[1])
            label = label.strip()
            xs.append(x)
            ys.append(y)
            labels.append(label)
    return xs,ys,labels


def align2(data_khaled, data_ref):
    ## khaled todo
    xs, ys, labels = data_khaled
    label_to_x = {l:x for x,l in zip(xs, labels)}
    label_to_y = {l:y for y,l in zip(ys, labels)}
    ## ref
    index_to_label = {i:l.replace('"', '') for i,l in enumerate(data_ref['node_label'])}

    ## match
    node_x = [label_to_x[index_to_label[i]] for i in range(len(index_to_label))]
    node_y = [label_to_y[index_to_label[i]] for i in range(len(index_to_label))]
    
    res_ref = data_ref.copy()
    res_ref['node_x'] = node_x
    res_ref['node_y'] = node_y
    
    return res_ref, objectify(res_ref)

In [41]:
# fn_ref = f'data/json/lastfm_refined/Graph_8_2587-min.json'
# fn_labels = './data/large/lastfm/Graph_8.txt.full.labels'
# fn_coords = './data/large/lastfm/Graph_8.txt.weighted.mtxBatchTree128PARAOUT0.txt'
# fn_edges = './data/large/lastfm/Graph_8.txt.weighted.mtx'

# fn_ref = f'data/json/topics_faryad_8level_linear/Graph_5000-1615834916-min.json'
# fn_labels = './data/large/topics/Graph_5000.txt.full.labels'
# fn_coords = './data/large/topics/Graph_5000.txt.weighted.mtxBatchTree128PARAOUT0.txt'
# fn_edges = './data/large/topics/Graph_5000.txt.weighted.mtx'

fn_ref = f'data/json/tol_graphs_linear/Graph_4-1615175837-min.json'
fn_labels = './data/large/tol/Graph_4.txt.full.labels'
fn_coords = './data/large/tol/Graph_4.txt.weighted.mtxBatchTree128PARAOUT0.txt'
fn_edges = './data/large/tol/Graph_4.txt.weighted.mtx'

data_ref = read_ref(fn_ref)
data_khaled = read_khaled(fn_labels, fn_coords, fn_edges)

data, nodes = align2(data_khaled, data_ref)

dir_out = '/'.join(fn_labels.split('/')[:-1])
fn_out = dir_out + '/'+ fn_labels.split('/')[-1].split('.')[0] + '-min.json'
fn_out_nodes = dir_out + '/' + fn_labels.split('/')[-1].split('.')[0] + '-nodes-0.json'

fn_out, fn_out_nodes

('./data/large/tol/Graph_4-min.json', './data/large/tol/Graph_4-nodes-0.json')

In [42]:
with open(fn_out, 'w') as f:
    json.dump(data, f, indent=2)
with open(fn_out_nodes, 'w') as f:
    json.dump(nodes, f, indent=2) 

In [None]:
# # expected output format
# with open('data/json/lastfm_refined/Graph_8_2587-nodes-0.json') as f:
#     out_nodes = json.load(f)
# print(out_nodes[0].keys())