Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pipeline/src/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,9 @@ def __init__(self, id=None, **properties):

def save(self, file_path, indent=2):
"""
Save this object to a file in JSON-LD format
Save this object to a file in JSON-LD format.

It is recommended to use the extension ".jsonld".
"""
with open(file_path, "w") as output_file:
json.dump(self.to_jsonld(), output_file, indent=indent)
Expand Down
33 changes: 23 additions & 10 deletions pipeline/src/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
The collection can be saved to and loaded from disk, in JSON-LD format.
"""

from glob import glob
import json
import os
from .registry import lookup_type
Expand Down Expand Up @@ -69,7 +70,7 @@ def _sort_nodes_by_id(self):
sorted_nodes = dict(sorted(self.nodes.items()))
self.nodes = sorted_nodes

def save(self, path, individual_files=False, include_empty_properties=False):
def save(self, path, individual_files=False, include_empty_properties=False, group_by_schema=False):
"""
Save the node collection to disk in JSON-LD format.

Expand All @@ -78,10 +79,18 @@ def save(self, path, individual_files=False, include_empty_properties=False):

path (str):
either a file or a directory into which the metadata will be written.
It is recommended to use the extension ".jsonld".
individual_files (bool):
if False (default), save the entire collection into a single file.
if True, `path` must be a directory, and each node is saved into a
separate file within that directory.
include_empty_properties (bool):
if False (default), do not include properties with value None.
if True, include all properties.
group_by_schema (bool):
Only applies if `individual_files` is True.
If False (default), save all files in a single directory.
If True, save into subdirectories according to the schema name.

Returns
-------
Expand Down Expand Up @@ -137,7 +146,12 @@ def save(self, path, individual_files=False, include_empty_properties=False):
else:
assert node.id.startswith("_:")
file_identifier = node.id[2:]
file_path = os.path.join(path, f"{file_identifier}.jsonld")
if group_by_schema:
dir_path = os.path.join(path, node.__class__.__name__)
os.makedirs(dir_path, exist_ok=True)
file_path = os.path.join(dir_path, f"{file_identifier}.jsonld")
else:
file_path = os.path.join(path, f"{file_identifier}.jsonld")
with open(file_path, "w") as fp:
data = node.to_jsonld(embed_linked_nodes=False, include_empty_properties=include_empty_properties)
json.dump(data, fp, indent=2)
Expand All @@ -150,9 +164,9 @@ def load(self, *paths, version=DEFAULT_VERSION):

`*paths` may contain either:

1) a single directory, in which case
all JSON-LD files all the top level of this directory will be loaded
(but without descending into subdirectories)
1) a single directory, in which case all JSON-LD files in this directory
and any non-hidden subdirectories will be loaded
(where hidden subdirectories are those whose name starts with ".").

2) one or more JSON-LD files, which will all be loaded.

Expand All @@ -161,11 +175,10 @@ def load(self, *paths, version=DEFAULT_VERSION):
"""
if len(paths) == 1 and os.path.isdir(paths[0]):
data_dir = paths[0]
json_paths = [
os.path.join(data_dir, item)
for item in os.listdir(data_dir)
if os.path.splitext(item)[1] in (".json", ".jsonld")
]
json_paths = (
glob(f"{data_dir}/**/*.jsonld", recursive=True)
+ glob(f"{data_dir}/**/*.json", recursive=True)
)
else:
json_paths = paths

Expand Down
20 changes: 20 additions & 0 deletions pipeline/tests/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,26 @@ def test_round_trip_multi_file():
assert p == np


def test_round_trip_multi_file_group_by_schema():
shutil.rmtree(test_output_dir, ignore_errors=True)
person = build_fake_node(omcore.Person)
collection = Collection(person)
collection.save(test_output_dir, individual_files=True, include_empty_properties=False, group_by_schema=True)
new_collection = Collection()
new_collection.load(test_output_dir)

assert len(collection) == len(new_collection)

for node in new_collection:
if node.id == person.id:
new_person = person
break

p = person.to_jsonld(include_empty_properties=False, embed_linked_nodes=True)
np = new_person.to_jsonld(include_empty_properties=False, embed_linked_nodes=True)
assert p == np


def test_collection_sort_by_id():
person = omcore.Person(given_name="A", family_name="Professor", id="_:004")
uni1 = omcore.Organization(full_name="University of This Place", id="_:002")
Expand Down