In [1]:
import networkx as nx
import sys
import os
from bs4 import BeautifulSoup as Soup

In [2]:
from networkx import release
print(release.version)

1.10


In [43]:
def create_graph(edges):
    g = nx.Graph()
    for edge in edges:
        g.add_edge(*edge)
    return g

g0 = create_graph([(0, 1), (0, 2), (0, 3), (0, 4), (4, 5), (5, 6), \
                   (6, 7), (7, 5)])
g1 = create_graph([(0, 1), (2, 3)])
nx.set_node_attributes(g0, 'atom_type', {0: 'carbon', 1: 'oxygen'})
nx.set_node_attributes(g1, 'atom_type', {0: 'oxygen', 1: 'nitrogen'})


nx.set_node_attributes(g0, 'strawberry', {0: -0.01, 1: 3.61423, 2: 1423})


nx.set_edge_attributes(g0, 'some_label_1', {(0, 1): -1, (0, 2): -5, (7, \
                                                                     5): 2.0})

nx.set_edge_attributes(g0, 'some_label_2', {(0, 1): 1, (0, 2): 5, (7, 5): 2})

nx.set_edge_attributes(g0, 'some_label_3', {(0, 1): True, (0, 2): False})

nx.set_edge_attributes(g0, 'some_label_4', {(0, 1): True, (0, 2): -0.9, \
                                            (4, 5): 'xxx', (7, 5): '', \
                                            (5, 6): None})

nx.set_edge_attributes(g0, 'some_label_5', {(0, 1): True, (0, 2): None})

nx.set_edge_attributes(g0, 'some_label_6', {(0, 1): True, (0, 2): [False]})


#add some new attributes to g1
nx.set_node_attributes(g1, 'gas', {0: 'oxygen', 2:'hydrigen' ,3: 'nitrogen'})
nx.set_node_attributes(g1, 'float_number', {0: 3.3, 2:2.2 ,3: 4.4})
nx.set_node_attributes(g1, 'int_number', {0: 0, 2:2 ,3: 3})
nx.set_node_attributes(g1, 'bool_value', {0: True, 2:True ,3: False})

nx.set_edge_attributes(g1, 'some_label_int', {(0, 1): 1, (2, 3): 5})
nx.set_edge_attributes(g1, 'some_label_float', {(0, 1): 1.1, (2, 3): 5.5})
nx.set_edge_attributes(g1, 'some_label_bool', {(0, 1): True, (2, 3): False})
nx.set_edge_attributes(g1, 'some_label_string', {(0, 1): "hahaha"})


# TODO
# convert_to_gxl(g0, 'g0.gxl')
# convert_to_gxl(g1, 'g1.gxl')


In [44]:
def convert_to_gxl(G, graph_id, graph_edgeids, graph_edgemode, filename):
    #create a new soup to write to gxl file
    tagText = '''
                 <!DOCTYPE gxl SYSTEM "http://www.gupro.de/GXL/gxl-1.0.dtd">
                 <gxl>
                 <graph id="" edgeids="" edgemode="">
                 </graph></gxl>'''
    soup = Soup(tagText, "xml")
    graph = soup.findAll('graph')[0]
    graph.attrs['id'] = graph_id
    graph.attrs['edgeids'] = graph_edgeids
    graph.attrs['edgemode'] = graph_edgemode
    # nodes
    error_sen = ""
    nodeAttrName_type = {}
    for nodeID in G.nodes():
        for k, v in G.node[nodeID].items(): #G.node[nodeID] is attr key-value dict
            if (k in nodeAttrName_type) and (nodeAttrName_type[k] == None):
                nodeAttrName_type[k] == None #do nothing
            elif (k in nodeAttrName_type) and (type(v) != nodeAttrName_type[k]) : 
                nodeAttrName_type[k] = None # None -- this attribute will be deleted forever for every node
                error_sen = "Error! Inconsistent data type -- "+"nodeID: "+str(nodeID)+"; Attribute name: "+k+"\n"+\
                            "\t\t   Should be "+str(nodeAttrName_type[k])+", but "+str(type(v))+" is found"+"\n"+\
                            "\t\t   Graph "+graph_id+": deleted inconsistent attribute "+k+"!"
                print(error_sen)
            elif k not in nodeAttrName_type:
                if isinstance(v, str) or isinstance(v, bool) or isinstance(v, int) or isinstance(v, float):
                    nodeAttrName_type[k] = type(v)
                else:
                    nodeAttrName_type[k] = None
                    error_sen = "Error! Wrong data type -- "+"nodeID: "+str(nodeID)+"; Attribute name: "+k+"\n"+\
                                "\t\t   Should be int/float/string/bool, but "+str(type(v))+" is found"+"\n"+\
                                "\t\t   Graph "+graph_id+": deleted invalid attribute "+k+"!"
                    print(error_sen)
            else: #valid and consistent
                nodeAttrName_type[k] = type(v) #do nothing
    for nodeID in G.nodes():
        node_tag = soup.new_tag("node", id=nodeID)
        for k, v in G.node[nodeID].items(): #G.node[nodeID] is attr key-value dict
            if nodeAttrName_type[k] == None :
                continue
            attr_type = ''
            if isinstance(v, str):
                attr_type = 'string'
            elif isinstance(v, bool):
                attr_type = 'bool'
            elif isinstance(v, int):
                attr_type = 'int'
            elif isinstance(v, float):
                attr_type = 'float'
            attr_tag = soup.new_tag("attr")
            attr_tag.attrs['name'] = k
            type_tag = soup.new_tag(attr_type)
            type_tag.string = str(v)
            attr_tag.append(type_tag)
            node_tag.append(attr_tag)
        graph.append(node_tag)
    # edges
    edgeAttrName_type = {}
    for edge in G.edges():
        for k, v in G[edge[0]][edge[1]].items(): #G[edge[0]][edge[1]] is attr key-value dict
            if (k in edgeAttrName_type) and (edgeAttrName_type[k] == None):
                edgeAttrName_type[k] == None #do nothing
            elif (k in edgeAttrName_type) and (type(v) != edgeAttrName_type[k]) :
                edgeAttrName_type[k] = None
                error_sen = "Error! Inconsistent data type -- "+"start_id: "+str(edge[0])+"; end_id: "+str(edge[1])+"; Attribute name: "+k+"\n"+\
                      "\t\t   Should be "+str(edgeAttrName_type[k])+", but "+str(type(v))+" is found"+"\n"+\
                      "\t\t   Graph "+graph_id+": deleted inconsistent attribute "+k+"!"
                print(error_sen)
            elif k not in edgeAttrName_type:
                if isinstance(v, str) or isinstance(v, bool) or isinstance(v, int) or isinstance(v, float):
                    edgeAttrName_type[k] = type(v)
                else:
                    edgeAttrName_type[k] = None
                    error_sen = "Error! Wrong data type -- "+"start_id: "+str(edge[0])+"; end_id: "+str(edge[1])+"; Attribute name: "+k+"\n"+\
                                "\t\t   Should be int/float/string/bool, but "+str(type(v))+" is found"+"\n"+\
                                "\t\t   Graph "+graph_id+": deleted invalid attribute "+k+"!"
                    print(error_sen)
            else: #valid and consistent
                edgeAttrName_type[k] = type(v) #do nothing
    for edge in G.edges():
        edge_tag = soup.new_tag("edge", to=edge[1])
        edge_tag.attrs['from'] = edge[0]
        for k, v in G[edge[0]][edge[1]].items(): #G[edge[0]][edge[1]] is attr key-value dict
            if edgeAttrName_type[k] == None :
                continue
            attr_type = ''
            if isinstance(v, str):
                attr_type = "string"
            elif isinstance(v, bool):
                attr_type = "bool"
            elif isinstance(v, int):
                attr_type = "int"
            elif isinstance(v, float):
                attr_type = "float"
            attr_tag = soup.new_tag("attr")
            attr_tag.attrs['name'] = k
            type_tag = soup.new_tag(attr_type)
            type_tag.string = str(v)
            attr_tag.append(type_tag)
            edge_tag.append(attr_tag)
        graph.append(edge_tag)
    #save to gxl file
    print("Only keep the consistent and valid attributes in networkx graph "+graph_id+", converting ......" )
    if not os.path.exists('./mygxl'):
        os.makedirs('./mygxl')
    with open('./mygxl/'+filename, 'w') as f:
        for line in soup.prettify():
            f.write(str(line)) 
    print("Done!")

In [45]:
graph_id_default = 'unkown'
graph_edgeids_default = 'false'
graph_edgemode_default = 'undirected'

#???networkx make smaller id as source, bigger as target
convert_to_gxl(g0, "g0", graph_edgeids_default, graph_edgemode_default,'g0.gxl')

Error! Inconsistent data type -- nodeID: 2; Attribute name: strawberry
		   Should be None, but <class 'int'> is found
		   Graph g0: deleted inconsistent attribute strawberry!
Error! Inconsistent data type -- start_id: 0; end_id: 2; Attribute name: some_label_6
		   Should be None, but <class 'list'> is found
		   Graph g0: deleted inconsistent attribute some_label_6!
Error! Inconsistent data type -- start_id: 0; end_id: 2; Attribute name: some_label_5
		   Should be None, but <class 'NoneType'> is found
		   Graph g0: deleted inconsistent attribute some_label_5!
Error! Inconsistent data type -- start_id: 0; end_id: 2; Attribute name: some_label_4
		   Should be None, but <class 'float'> is found
		   Graph g0: deleted inconsistent attribute some_label_4!
Error! Inconsistent data type -- start_id: 5; end_id: 7; Attribute name: some_label_1
		   Should be None, but <class 'float'> is found
		   Graph g0: deleted inconsistent attribute some_label_1!
Only keep the consistent and valid at

In [42]:
convert_to_gxl(g1, "g1", graph_edgeids_default, graph_edgemode_default,'g1.gxl')

Only keep the consistent and valid attributes in networkx graph g1, converting ......
Done!
