# Convert .json.bz2 graph files to neo4j import .csv format

In [1]:
import bz2
import json
import pandas as pd

In [2]:
def get_edge_abbrev(edge, kind_to_abbrev):

    start_node = edge['source_id']
    end_node = edge['target_id']
    
    edge_abbrev = ''
    edge_abbrev += kind_to_abbrev[start_node[0]]

    if edge['direction'] == 'backward':
        edge_abbrev += '<'

    edge_abbrev += kind_to_abbrev[edge['kind']]

    if edge['direction'] == 'forward':
        edge_abbrev += '>'

    edge_abbrev += kind_to_abbrev[end_node[0]]

    return edge_abbrev

In [3]:
def convert_to_neo(json_graph):
    graph = {':START_ID': [], ':END_ID': [], ':TYPE': []}
    
    for edge in json_graph['edges']:
        graph[':START_ID'].append(edge['source_id'][1])
        graph[':END_ID'].append(edge['target_id'][1])
       
        edge_type = edge['kind'] + '_' + get_edge_abbrev(edge, json_graph['kind_to_abbrev'])
        
        graph[':TYPE'].append(edge_type)
        
    return pd.DataFrame(graph)

In [4]:
for i in range(1, 6):
    with bz2.open('hetnet_perm-{}.json.bz2'.format(i), 'rt') as fin:
        graph = json.load(fin)
    neo_graph = convert_to_neo(graph)
    neo_graph.to_csv('hetnet_perm-{}.csv'.format(i), index=False)