forked from biothings/biothings_schema.py
-
Notifications
You must be signed in to change notification settings - Fork 2
/
schemaorg_2_json_schema.py
60 lines (39 loc) · 1.46 KB
/
schemaorg_2_json_schema.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import os
import json
from schema_explorer import SchemaExplorer
import pprint as pp
from graphviz import Source
import networkx as nx
def load_schemaorg_model(model_path):
# instantiate schema explorer
se = SchemaExplorer()
se.load_schema(model_path)
# visualize loaded schema
full_schema = se.full_schema_graph()
full_schema.engine = "fdp"
full_schema.render(filename=os.path.basename("schema.org.model.pdf"), view = True)
return se
def get_children(se, parent):
return se.find_child_classes(parent)
def dump_schema_graph(se, path):
# ensure node properties are not NULL
G = se.get_nx_schema()
for node in G.nodes(data = True):
for attribute, value in node[1].items():
if not value or value == "null":
value = "Not defined"
node[1][attribute] = value
# write out the networkx graph in GML format
# R igraph should be able to read that
nx.write_gml(G, path)
if __name__ == '__main__':
model_path = './data/csbcContext.jsonld'
print("Loading model...")
se = load_schemaorg_model(model_path)
# get children of a schema.org class/entity
class_children = get_children(se, "ResourceType")
# serialize networkx graph into a standard format (Pajek)
# readable in R igraph package
serialized_model_path = "./model.gml"
dump_schema_graph(se, serialized_model_path)
print("Graph_saved as " + serialized_model_path)