diff --git a/build.py b/build.py index 39446ee..b5e9586 100644 --- a/build.py +++ b/build.py @@ -18,59 +18,59 @@ git_pusher = GitPusher() versions = [] -################################## -# A: gather schema specifications -################################## +##################################################### +# Gather schema specifications and instance libraries +##################################################### # Step 1 - clone central repository in main branch to get the latest sources schema_loader = SchemaLoader() +instance_loader = InstanceLoader() -for schema_version in schema_loader.get_schema_versions(): +for version in schema_loader.get_schema_versions(): # Step 2 - building static structure - copy_static_structures(schema_version) + copy_static_structures(version) # Step 3 - find all involved schemas for the current version - schemas = schema_loader.find_schemas(schema_version) - relative_path_by_schema = schema_loader.get_relative_path_for_schemas(schemas, schema_version) - - for schema in schemas: - # Step 4 - build documentation for version specific schema - SchemaDocBuilder(schema, schema_loader.schemas_sources, relative_path_by_schema).build() - - # Step 5 - building toctrees (please note that the implementation requires the schemas to be built already) - create_toc_tree_page(schema_version, ["schema_specifications"]) - - # Step 6 - checkout version branch in relative "upload" directory (provided by GitHub pipeline), copy files and push - if schema_version not in versions: - versions.append(schema_version) - -###################### -# B: gather instances -###################### - -# Step 1 - clone instances repository in main branch to get the latest sources -instance_loader = InstanceLoader() - -for instance_version in instance_loader.get_instance_versions(): - # Step 2 - find all involved schemas for the current version - instances_paths = instance_loader.find_instances(instance_version) - - # Step 3 - gather version specific instance libraries for documentation build - instances = InstancesDocBuilder(instances_paths, instance_version) - - # Step 4 - build documentation for version specific schema - instances.build() - - # Step 5 - building toctrees (please note that the implementation requires the schemas to be built already) - create_toc_tree_page(instance_version, ["instance_libraries"]) + absolute_paths_for_schemas = schema_loader.find_schemas(version) + relative_paths_for_schema_docu = schema_loader.get_relative_paths_for_schema_docu(absolute_paths_for_schemas, version) + + # Step 4 - find all involved instances for the current version + relative_paths_for_instancelib_docu = None + instancelib_docu_path_for_schemas = None + if version in instance_loader.get_instance_versions(): + absolute_paths_for_instances = instance_loader.find_instances(version) + relative_paths_for_instancelib_docu, instancelib_docu_path_for_schemas = instance_loader.get_relative_paths_and_schemas_for_instancelib_docu(absolute_paths_for_instances, version) + + # Step 5 - build documentation page for each version specific schema + for absolute_path_schema in absolute_paths_for_schemas: + # Step 5.1 - retrieve schema name from absolute path + schema_name = schema_loader.get_schema_name(absolute_path_schema) + # Step 5.2 - retrieve path to instance library path if available for schema + if instancelib_docu_path_for_schemas and schema_name in instancelib_docu_path_for_schemas: + instancelib_docu_path_for_schema = instancelib_docu_path_for_schemas[schema_name] + else: + instancelib_docu_path_for_schema = None + # Step 5.3 - build schema documentation page + SchemaDocBuilder(absolute_path_schema, schema_name, version, relative_paths_for_schema_docu, instancelib_docu_path_for_schema).build() + + # Step 6 - build toctrees (please note that the implementation requires the schemas to be built already) + create_toc_tree_page(version, ["schema_specifications"]) + + # Step 7 - build instance library documentation page for each version specific instance list + if relative_paths_for_instancelib_docu: + InstancesDocBuilder(relative_paths_for_instancelib_docu, version, instancelib_docu_path_for_schemas, relative_paths_for_schema_docu).build() + + # Step 8 - building toctrees (please note that the implementation requires the instances to be built already) + create_toc_tree_page(version, ["instance_libraries"]) + + # Step 9 - checkout version branch in relative "upload" directory (provided by GitHub pipeline), copy files and push + if version not in versions: + versions.append(version) - # Step 6 - checkout version branch in relative "upload" directory (provided by GitHub pipeline), copy files and push - if instance_version not in versions: - versions.append(instance_version) ######################################################################################################################## -# Step FINAL (A+B) - checkout version branch in relative "upload" directory (provided by GitHub pipeline), copy files and push +# Step FINAL - checkout version branch in relative "upload" directory (provided by GitHub pipeline), copy files and push ######################################################################################################################## for version in versions: diff --git a/pipeline/instance.py b/pipeline/instance.py index bbbf856..44413b3 100644 --- a/pipeline/instance.py +++ b/pipeline/instance.py @@ -8,111 +8,124 @@ class InstancesDocBuilder(object): - def __init__(self, instances_file_paths:List, version): + def __init__(self, relative_paths_for_instancelib_docu:Dict, version:str, instancelib_docu_path_for_schemas:Dict, relative_paths_for_schema_docu:Dict): self.version = version + self.relative_paths_for_instancelib_docu = relative_paths_for_instancelib_docu + self.instancelib_docu_path_for_schemas = instancelib_docu_path_for_schemas + self.schema_for_instancelib_docu_path = {v: k for k, v in instancelib_docu_path_for_schemas.items()} + self.relative_paths_for_schema_docu = relative_paths_for_schema_docu self.readthedocs_url = "https://openminds-documentation.readthedocs.io/en/" - self.instances_collection = {} - self.target_paths_for_instances = {} - for instance_file_path in instances_file_paths: - with open(instance_file_path, "r") as instance_f: - instance_payload = json.load(instance_f) + def _target_file_without_extension(self, instancelib_docu_relative_path:str) -> str: + return os.path.join(self.version, "docs", "instance_libraries", instancelib_docu_relative_path) - instance_id = instance_payload["@id"] + def _capitalze_term(self, term:str): + return term[0].upper() + term[1:] - split_path = instance_file_path.split("sources_instances")[-1].split("/") - if split_path[3] in ["contentTypes", "licenses", "brainAtlases", "commonCoordinateSpaces"]: - relative_path = os.path.join("target", self.version, "docs", "instance_libraries", split_path[3]) - elif split_path[3] in ["terminologies", "brainAtlasVersions", "commonCoordinateSpaceVersions", "parcellationEntities", "parcellationEntityVersions"]: - relative_path = os.path.join("target", self.version, "docs", "instance_libraries", split_path[3], split_path[4]) - else: - pass + def _create_library_title(self, instancelib_docu_relative_path:str): + instancelib_docu_relative_path_split = instancelib_docu_relative_path.split('/') + instance_basename = instancelib_docu_relative_path_split[-1].replace('.jsonld', '') + instance_basename_capitalized = self._capitalze_term(instance_basename) + if len(instancelib_docu_relative_path_split) == 1: + return f"{instance_basename_capitalized} library" + else: + subdir_name = instancelib_docu_relative_path_split[0] + subdir_name_capitalized = self._capitalze_term(subdir_name) + return f"{subdir_name_capitalized}: {instance_basename_capitalized} library" - self.instances_collection[instance_id] = instance_payload - if relative_path in self.target_paths_for_instances: - self.target_paths_for_instances[relative_path].append(instance_id) - else: - self.target_paths_for_instances[relative_path] = [instance_id] + def _build_plurals(self, term: str) -> str: + # build type plurals + if term.endswith("s"): + return f"{term}es" + elif term.endswith("y"): + return f"{term[:-1]}ies" + else: + return f"{term}s" + + def _build_single_instance_link(self, instanceReference:Dict) -> str: + # get instance identity and create instance heading + instance_id = instanceReference["@id"] + instance_type = instance_id.split("/")[4] + instance_heading = instance_id.split("/")[-1].casefold() + + # build type plurals + instance_type_plural = self._build_plurals(instance_type) - def _build_single_term_link(self, termReference:Dict, instanceType:str) -> str: - term_id = termReference["@id"] - term = term_id.split("/")[-1].replace(".", "-").casefold() - name = self.instances_collection[term_id]["name"] if "name" in self.instances_collection[term_id] else self.instances_collection[term_id]["shortName"] - if instanceType == "licenses": - linkdir = os.path.join(self.readthedocs_url, self.version, "instance_libraries") - elif instanceType == "contentTypes": - linkdir = os.path.join(self.readthedocs_url, self.version, "instance_libraries") - elif instanceType == "brainAtlases": - linkdir = os.path.join(self.readthedocs_url, self.version, "instance_libraries") - elif instanceType == "commonCoordinateSpaces": - linkdir = os.path.join(self.readthedocs_url, self.version, "instance_libraries") - elif instanceType == "brainAtlasVersions": - linkdir = os.path.join(self.readthedocs_url, self.version, "instance_libraries", "brainAtlasVersions") - elif instanceType == "commonCoordinateSpaceVersions": - linkdir = os.path.join(self.readthedocs_url, self.version, "instance_libraries", "commonCoordinateSpaceVersions") - elif instanceType == "parcellationEntities": - linkdir = os.path.join(self.readthedocs_url, self.version, "instance_libraries", "parcellationEntities") - elif instanceType == "parcellationEntityVersions": - linkdir = os.path.join(self.readthedocs_url, self.version, "instance_libraries", "parcellationEntityVersions") + # create type depending link directory and page heading + if instance_type in ["brainAtlas", "contentType", "commonCoordinateSpace", "license"]: + link_dir = os.path.join(self.readthedocs_url, self.version, "instance_libraries") + page_heading = instance_type_plural + elif instance_type in ["parcellationEntity", "brainAtlasVersion", "commonCoordinateSpaceVersion"]: + link_dir = os.path.join(self.readthedocs_url, self.version, "instance_libraries", instance_type_plural) + page_heading = instance_heading.split("_")[0] + elif instance_type in ["parcellationEntityVersion"]: + link_dir = os.path.join(self.readthedocs_url, self.version, "instance_libraries", instance_type_plural) + page_heading = "_".join(instance_heading.split("_")[:2]) else: - linkdir = os.path.join(self.readthedocs_url, self.version, "instance_libraries", "terminologies") - link = os.path.join(linkdir, f"{instanceType}.html#{term}") - return f"`{name} <{link}>`_" + link_dir = os.path.join(self.readthedocs_url, self.version, "instance_libraries", "terminologies") + page_heading = instance_type - def _build_multi_term_links(self, termReferenceList:List, instanceType:str) -> str: + # create link and return RST hyperlink + link = os.path.join(link_dir, f"{page_heading}.html#{instance_heading}") + return f"`{instance_id.split('/')[-1]} <{link}>`_" + + def _build_multi_instance_links(self, instanceReferenceList:List) -> str: linklist = [] - for termReference in termReferenceList: - linklist.append(self._build_single_term_link(termReference, instanceType)) + for instanceReference in instanceReferenceList: + linklist.append(self._build_single_instance_link(instanceReference)) return ", ".join(linklist) - def _build_single_version_link(self, versionReference:Dict, include_shortName=False) -> str: - version_id = versionReference["@id"] - id_label = version_id.split("/")[-1] - label = id_label.split("_")[0] - if version_id in self.instances_collection: - version_data = self.instances_collection[version_id] - title = version_data["shortName"] - subtitle = version_data["versionIdentifier"] if "versionIdentifier" in version_data and version_data["versionIdentifier"] else id_label - subtitle_url = subtitle.replace(' ', '-').replace(',', '-').replace('.', '-').replace('--', '-').casefold() - title_mod = title.replace(' ', '%20') - page = f"{title_mod}.html#version-{subtitle_url}" - subdir = "brainAtlases" if "brainAtlasVersion" in version_id else "commonCoordinateSpaces" - link = os.path.join(self.readthedocs_url, self.version, "instance_libraries", subdir, page) - if include_shortName: - return f"`{title} (version {subtitle}) <{link}>`_" if subtitle != id_label else f"{id_label} \(TODO\)" - else: - return f"`{subtitle} <{link}>`_" if subtitle != id_label else f"{id_label} \(TODO\)" + def _build_instance_library_link(self, instanceReferenceList:List[Dict]) -> str: + # this function assumes that all instances in list have the same type + instance_example_id = instanceReferenceList[0]["@id"] + instance_example_type = instance_example_id.split("/")[4] + instance_type_plural = self._build_plurals(instance_example_type) + instance_example_id_basename = instance_example_id.split("/")[-1] + + # page_heading and link depends on type + if instance_example_type in ["brainAtlas", "contentType", "commonCoordinateSpace", "license"]: + link_dir = os.path.join(self.readthedocs_url, self.version, "instance_libraries") + page_heading = instance_type_plural + elif instance_example_type in ["parcellationEntity", "brainAtlasVersion", "commonCoordinateSpaceVersion"]: + link_dir = os.path.join(self.readthedocs_url, self.version, "instance_libraries", instance_type_plural) + page_heading = instance_example_id_basename.split("_")[0] + elif instance_example_type in ["parcellationEntityVersion"]: + link_dir = os.path.join(self.readthedocs_url, self.version, "instance_libraries", instance_type_plural) + page_heading = "_".join(instance_example_id_basename.split("_")[:2]) else: - return f"{id_label} \(TODO\)" + link_dir = os.path.join(self.readthedocs_url, self.version, "instance_libraries", "terminologies") + page_heading = instance_example_type - def _build_multi_version_links(self, versionReferenceList:Dict, reverse_list=True, include_shortName=False) -> str: - linklist = [] - for versionReference in versionReferenceList: - linklist.append(self._build_single_version_link(versionReference, include_shortName)) - return list(reversed(sorted(linklist))) if reverse_list else sorted(linklist) + # create link and return RST hyperlink + link = os.path.join(link_dir, f"{page_heading}.html") + + return f"`cf. respective {instance_type_plural} <{link}>`_" - def _build_simple_instances(self, docpath:str, instances_ids:List, schemadir:str): - title = os.path.basename(docpath) - title_CamelCase = "".join([title[0].capitalize(), title[1:]]) - schema_link = os.path.join(self.readthedocs_url, self.version, "schema_specifications", schemadir, f"{title}.html") - with open(f"{docpath}.rst", "w") as output_file: + def _build_simple_instances(self, instancelib_docu_relative_path:str, instances_absolute_paths:List[str]): + target_file = os.path.join("target", f"{self._target_file_without_extension(instancelib_docu_relative_path)}.rst") + os.makedirs(os.path.dirname(target_file), exist_ok=True) + schema_name = self.schema_for_instancelib_docu_path[instancelib_docu_relative_path] + schema_path = self.relative_paths_for_schema_docu[schema_name] + schema_link = os.path.join(self.readthedocs_url, self.version, "schema_specifications", f"{schema_path}.html") + with open(target_file, "w") as output_file: doc = RstCloth(output_file, line_width=100000) - doc.heading(f"{title_CamelCase}", char="#", overline=True) + title = self._create_library_title(instancelib_docu_relative_path) + doc.heading(f"{title}", char="#", overline=True) doc.newline() - doc.content(f"Related schema specification: `{title_CamelCase} <{schema_link}>`_") + doc.content(f"Related schema specification: `{schema_name} <{schema_link}>`_") doc.newline() doc.content("------------") doc.newline() doc.content("------------") doc.newline() - for instance_id in sorted(instances_ids): - instance_data = self.instances_collection[instance_id] - subtitle = instance_id.split("/")[-1] + for instance_path in sorted(instances_absolute_paths): + with open(instance_path, "r") as instance_f: + instance_data = json.load(instance_f) + subtitle = instance_path.split("/")[-1].replace('.jsonld', '') doc.heading(subtitle, char="-") doc.newline() doc.directive(name="admonition", arg="metadata sheet") field_list_indent = 3 - doc.content(":class: dropdown", indent=field_list_indent) doc.newline() for prop, value in sorted(instance_data.items()): if type(value) == str: @@ -122,233 +135,127 @@ def _build_simple_instances(self, docpath:str, instances_ids:List, schemadir:str sorted_value_list = sorted(value) doc.field(name=prop, value=", ".join(sorted_value_list), indent=field_list_indent) if all(isinstance(item, Dict) for item in value): - doc.field(name=prop, value=self._build_multi_term_links(value, prop), indent=field_list_indent) + doc.field(name=prop, value=self._build_multi_instance_links(value), indent=field_list_indent) doc.newline() - doc.content(f"`BACK TO TOP <{title_CamelCase}_>`_") + doc.content(f"`BACK TO TOP <{title}_>`_") doc.newline() doc.content("------------") doc.newline() - def _build_atlas_terminology(self, data:List) -> List: - atlas_terminology = [] - for entity_ref in data: - entity_data = self.instances_collection[entity_ref["@id"]] - parents = entity_data.get('hasParent', []) - if not parents: - atlas_terminology.append(entity_data) - else: - for parent_ref in parents: - parent_item = self.instances_collection.get(parent_ref["@id"]) - if parent_item: - parent_item.setdefault('children', []).append(entity_data) - def build_bullet_list(tree, indent=0): - bullet_list = [] - for item in tree: - bullet_list.append(f"{' '*(indent)}* {item['name']}") - if 'children' in item: - subtree = build_bullet_list(item['children'], indent + 1) - bullet_list.extend(subtree) - return bullet_list - - return build_bullet_list(atlas_terminology) - - def _build_brain_atlas(self, docpath:str, instance_id:str, schemadir:str): - atlas_data = self.instances_collection[instance_id] - title = os.path.basename(docpath) - title_CamelCase = "".join([title[0].capitalize(), title[1:]]) - schema_link = os.path.join(self.readthedocs_url, self.version, "schema_specifications", f"{schemadir}.html") - with open(f"{docpath}.rst", "w") as output_file: + def _build_simple_sands_instances(self, instancelib_docu_relative_path:str, instances_absolute_paths:List[str]): + target_file = os.path.join("target", f"{self._target_file_without_extension(instancelib_docu_relative_path)}.rst") + os.makedirs(os.path.dirname(target_file), exist_ok=True) + instancelib_docu_relative_path_without_subdir = instancelib_docu_relative_path.split("/")[0] + schema_name = self.schema_for_instancelib_docu_path[instancelib_docu_relative_path_without_subdir] + schema_path = self.relative_paths_for_schema_docu[schema_name] + schema_link = os.path.join(self.readthedocs_url, self.version, "schema_specifications", f"{schema_path}.html") + with open(target_file, "w") as output_file: doc = RstCloth(output_file, line_width=100000) + title = self._create_library_title(instancelib_docu_relative_path) doc.heading(f"{title}", char="#", overline=True) doc.newline() - doc.content(f"Related schema specification: `BrainAtlas <{schema_link}>`_") - doc.newline() - doc.directive(name="admonition", arg="metadata sheet") - doc.newline() - field_list_indent = 3 - for prop, value in sorted(atlas_data.items()): - if isinstance(value, (str, int, float)): - doc.field(name=prop, value=str(value), indent=field_list_indent) - elif isinstance(value, Dict): - if prop == "digitalIdentifier": - doc.field(name=prop, value=value["@id"], indent=field_list_indent) - if prop == "usedSpecies": - doc.field(name=prop, value=self._build_single_term_link(value, "species"), indent=field_list_indent) - if prop == "hasTerminology": - doc.field(name=prop, value="(see below)", indent=field_list_indent) - elif isinstance(value, List): - if all(isinstance(item, (str, int, float)) for item in value): - sorted_value_list = sorted([str(item) for item in value]) - doc.field(name=prop, value=", ".join(sorted_value_list), indent=field_list_indent) - if all(isinstance(item, Dict) for item in value): - if prop == "hasVersion": - multiline_indent = len(prop) + 3 + field_list_indent - version_link_list = self._build_multi_version_links(atlas_data["hasVersion"]) - doc.field(name=prop, value=f"| {version_link_list[0]}", indent=field_list_indent) - if len(version_link_list) > 1: - for link in version_link_list[1:]: - doc.content(f"| {link}", indent=multiline_indent) - doc.newline() - if "hasTerminology" in atlas_data and atlas_data["hasTerminology"]: - if "hasEntity" in atlas_data["hasTerminology"] and atlas_data["hasTerminology"]["hasEntity"]: - doc.heading(f"Terminology", char="#") - doc.newline() - parcellation_entities = atlas_data["hasTerminology"]["hasEntity"] - terminology = self._build_atlas_terminology(parcellation_entities) - if terminology: - for entity in terminology: - doc.content(entity) + doc.content(f"Related schema specification: `{schema_name} <{schema_link}>`_") doc.newline() doc.content("------------") doc.newline() doc.content("------------") doc.newline() + for instance_path in sorted(instances_absolute_paths): + with open(instance_path, "r") as instance_f: + instance_data = json.load(instance_f) + subtitle = instance_path.split("/")[-1].replace('.jsonld', '') + doc.heading(subtitle, char="-") + doc.newline() + doc.directive(name="admonition", arg="metadata sheet") + field_list_indent = 3 + doc.newline() + for prop, value in sorted(instance_data.items()): + if isinstance(value, (str, int, float)): + doc.field(name=prop, value=str(value), indent=field_list_indent) + elif isinstance(value, Dict) and "@id" in value.keys(): + doc.field(name=prop, value=self._build_single_instance_link(value), indent=field_list_indent) + elif isinstance(value, List): + if all(isinstance(item, (str, int, float)) for item in value): + sorted_value_list = sorted([str(item) for item in value]) + doc.field(name=prop, value=", ".join(sorted_value_list), indent=field_list_indent) + if all(isinstance(item, Dict) for item in value): + if prop == "hasAnnotation": + doc.field(name=prop, value="COMING SOON", indent=field_list_indent) + else: + doc.field(name=prop, value=self._build_multi_instance_links(value), indent=field_list_indent) + doc.newline() + doc.content(f"`BACK TO TOP <{title}_>`_") + doc.newline() + doc.content("------------") + doc.newline() - if "hasVersion" in atlas_data and atlas_data["hasVersion"]: - atlas_version_list = [ref["@id"] for ref in atlas_data["hasVersion"]] - for atlas_version_id in reversed(sorted(atlas_version_list)): - atlas_version_data = self.instances_collection[atlas_version_id] if atlas_version_id in self.instances_collection else None - subtitle = atlas_version_data["versionIdentifier"] if atlas_version_data else atlas_version_id.split("/")[-1].split("_")[-1] - doc.heading(f"version {subtitle}", char="#") - doc.newline() - if atlas_version_data: - for prop, value in sorted(atlas_version_data.items()): - if isinstance(value, (str, int, float)): - doc.field(name=prop, value=str(value), indent=field_list_indent) - elif isinstance(value, Dict): - if prop in ["digitalIdentifier", "relatedPublication"]: - doc.field(name=prop, value=value["@id"], indent=field_list_indent) - if prop == "accessibility": - doc.field(name=prop, value=self._build_single_term_link(value, "productAccessibility"), indent=field_list_indent) - if prop == "license": - doc.field(name=prop, value=self._build_single_term_link(value, "licenses"), indent=field_list_indent) - if prop == "coordinateSpace": - doc.field(name=prop, value=self._build_single_version_link(value, include_shortName=True), indent=field_list_indent) - if prop == "isNewVersionOf": - doc.field(name=prop, value=self._build_single_version_link(value, include_shortName=True), indent=field_list_indent) - if prop == "type": - doc.field(name=prop, value=self._build_single_term_link(value, "atlasType"), indent=field_list_indent) - elif isinstance(value, List): - if all(isinstance(item, (str, int, float)) for item in value): - sorted_value_list = sorted([str(item) for item in value]) - doc.field(name=prop, value=", ".join(sorted_value_list), indent=field_list_indent) - if all(isinstance(item, Dict) for item in value): - if prop == "isAlternativeVersionOf": - multiline_indent = len(prop) + 3 + field_list_indent - version_link_list = self._build_multi_version_links(value, include_shortName=True) - doc.field(name=prop, value=f"| {version_link_list[0]}", indent=field_list_indent) - if len(version_link_list) > 1: - for link in version_link_list[1:]: - doc.content(f"| {link}", indent=multiline_indent) - else: - doc.content(f"TODO") - doc.newline() - doc.content(f"`BACK TO TOP <{title}_>`_") - doc.newline() - doc.content("------------") - doc.newline() - - def _build_common_coordinate_space(self, docpath:str, instance_id:str, schemadir:str): - space_data = self.instances_collection[instance_id] - title = os.path.basename(docpath) - title_CamelCase = "".join([title[0].capitalize(), title[1:]]) - schema_link = os.path.join(self.readthedocs_url, self.version, "schema_specifications", f"{schemadir}.html") - with open(f"{docpath}.rst", "w") as output_file: + def _build_complex_sands_instances(self, instancelib_docu_relative_path:str, instances_absolute_paths:List[str]): + target_file = os.path.join("target", f"{self._target_file_without_extension(instancelib_docu_relative_path)}.rst") + os.makedirs(os.path.dirname(target_file), exist_ok=True) + instancelib_docu_relative_path_without_subdir = instancelib_docu_relative_path.split("/")[0] + schema_name = self.schema_for_instancelib_docu_path[instancelib_docu_relative_path_without_subdir] + schema_path = self.relative_paths_for_schema_docu[schema_name] + schema_link = os.path.join(self.readthedocs_url, self.version, "schema_specifications", f"{schema_path}.html") + with open(target_file, "w") as output_file: doc = RstCloth(output_file, line_width=100000) + title = self._create_library_title(instancelib_docu_relative_path) doc.heading(f"{title}", char="#", overline=True) doc.newline() - doc.content(f"Related schema specification: `CommonCoordinateSpace <{schema_link}>`_") - doc.newline() - doc.directive(name="admonition", arg="metadata sheet") - doc.newline() - field_list_indent = 3 - for prop, value in sorted(space_data.items()): - if isinstance(value, (str, int, float)): - doc.field(name=prop, value=str(value), indent=field_list_indent) - elif isinstance(value, Dict): - if prop == "digitalIdentifier": - doc.field(name=prop, value=value["@id"], indent=field_list_indent) - if prop == "usedSpecies": - doc.field(name=prop, value=self._build_single_term_link(value, "species"), indent=field_list_indent) - elif isinstance(value, List): - if all(isinstance(item, (str, int, float)) for item in value): - sorted_value_list = sorted([str(item) for item in value]) - doc.field(name=prop, value=", ".join(sorted_value_list), indent=field_list_indent) - if all(isinstance(item, Dict) for item in value): - if prop == "hasVersion": - multiline_indent = len(prop) + 3 + field_list_indent - version_link_list = self._build_multi_version_links(space_data["hasVersion"]) - doc.field(name=prop, value=f"| {version_link_list[0]}", indent=field_list_indent) - if len(version_link_list) > 1: - for link in version_link_list[1:]: - doc.content(f"| {link}", indent=multiline_indent) + doc.content(f"Related schema specification: `{schema_name} <{schema_link}>`_") doc.newline() doc.content("------------") doc.newline() doc.content("------------") doc.newline() - - if "hasVersion" in space_data and space_data["hasVersion"]: - space_version_list = [ref["@id"] for ref in space_data["hasVersion"]] - for space_version_id in reversed(sorted(space_version_list)): - space_version_data = self.instances_collection[space_version_id] if space_version_id in self.instances_collection else None - subtitle = space_version_data["versionIdentifier"] if space_version_data else space_version_id.split("/")[-1].split("_")[-1] - doc.heading(f"version {subtitle}", char="#") - doc.newline() - if space_version_data: - for prop, value in sorted(space_version_data.items()): - if isinstance(value, (str, int, float)): - doc.field(name=prop, value=str(value), indent=field_list_indent) - elif isinstance(value, Dict): - if prop in ["digitalIdentifier", "relatedPublication"]: - doc.field(name=prop, value=value["@id"], indent=field_list_indent) - if prop == "accessibility": - doc.field(name=prop, value=self._build_single_term_link(value, "productAccessibility"), indent=field_list_indent) - if prop == "license": - doc.field(name=prop, value=self._build_single_term_link(value, "licenses"), indent=field_list_indent) - if prop == "anatomicalAxesOrientation": - doc.field(name=prop, value=self._build_single_term_link(value, prop), indent=field_list_indent) - if prop == "nativeUnit": - doc.field(name=prop, value=self._build_single_term_link(value, "UnitOfMeasurement"), indent=field_list_indent) - if prop == "isNewVersionOf": - doc.field(name=prop, value=self._build_single_version_link(value, include_shortName=True), indent=field_list_indent) - elif isinstance(value, List): - if all(isinstance(item, (str, int, float)) for item in value): - sorted_value_list = sorted([str(item) for item in value]) - doc.field(name=prop, value=", ".join(sorted_value_list), indent=field_list_indent) - if all(isinstance(item, Dict) for item in value): - if prop == "isAlternativeVersionOf": - multiline_indent = len(prop) + 3 + field_list_indent - version_link_list = self._build_multi_version_links(value, include_shortName=True) - doc.field(name=prop, value=f"| {version_link_list[0]}", indent=field_list_indent) - if len(version_link_list) > 1: - for link in version_link_list[1:]: - doc.content(f"| {link}", indent=multiline_indent) - else: - doc.content(f"TODO") - doc.newline() - doc.content(f"`BACK TO TOP <{title}_>`_") - doc.newline() - doc.content("------------") - doc.newline() + for instance_path in sorted(instances_absolute_paths): + with open(instance_path, "r") as instance_f: + instance_data = json.load(instance_f) + subtitle = instance_path.split("/")[-1].replace('.jsonld', '') + doc.heading(subtitle, char="-") + doc.newline() + doc.directive(name="admonition", arg="metadata sheet") + field_list_indent = 3 + doc.newline() + for prop, value in sorted(instance_data.items()): + if isinstance(value, (str, int, float)): + doc.field(name=prop, value=str(value), indent=field_list_indent) + elif isinstance(value, Dict): + if prop == "digitalIdentifier": + doc.field(name=prop, value=value["@id"], indent=field_list_indent) + if prop == "usedSpecies": + doc.field(name=prop, value=self._build_single_instance_link(value), indent=field_list_indent) + if prop == "hasTerminology": + if instance_data["hasTerminology"] and instance_data["hasTerminology"]["hasEntity"]: + parcellation_entities = instance_data["hasTerminology"]["hasEntity"] + doc.field(name=prop, value=self._build_instance_library_link(parcellation_entities), indent=field_list_indent) + else: + doc.field(name=prop, value="NOT DEFINED YET", indent=field_list_indent) + elif isinstance(value, List): + if all(isinstance(item, (str, int, float)) for item in value): + sorted_value_list = sorted([str(item) for item in value]) + doc.field(name=prop, value=", ".join(sorted_value_list), indent=field_list_indent) + if all(isinstance(item, Dict) for item in value): + if prop == "hasVersion": + if instance_data["hasVersion"]: + doc.field(name=prop, value=self._build_instance_library_link(value), indent=field_list_indent) + else: + doc.field(name=prop, value="NOT DEFINED YET", indent=field_list_indent) + doc.newline() + doc.content(f"`BACK TO TOP <{title}_>`_") + doc.newline() + doc.content("------------") + doc.newline() def build(self): - for page_path, instances_ids in self.target_paths_for_instances.items(): - os.makedirs(os.path.dirname(page_path), exist_ok=True) - # build RST docu for each terminology - if "terminologies" in page_path: - self._build_simple_instances(page_path, instances_ids, "controlledTerms") - # build RST docu for content types - elif "contentTypes" in page_path: - self._build_simple_instances(page_path, instances_ids, "core/data/contentType") - # # build RST docu for licenses - elif "licenses" in page_path: - self._build_simple_instances(page_path, instances_ids, "core/data/license") - # build RST docu for each brain atlas -# elif "brainAtlases" in page_path: -# self._build_brain_atlas(page_path, instances_ids[0], "SANDS/atlas/brainAtlas") -# # build RST docu for each brain atlas -# elif "commonCoordinateSpaces" in page_path: -# self._build_common_coordinate_space(page_path, instances_ids[0], "SANDS/atlas/commonCoordinateSpace") + for instancelib_docu_relative_path, instances_absolute_paths in self.relative_paths_for_instancelib_docu.items(): + # build RST docu for content types, licences and each terminology + if instancelib_docu_relative_path.split("/")[0] in ["contentTypes", "licenses", "terminologies"]: + self._build_simple_instances(instancelib_docu_relative_path, instances_absolute_paths) + # build RST docu for research products and their versions (brainAtlases, commonCoordinateSpaces, brainAtlasVersions, commonCoordinateSpaceVersions) + if instancelib_docu_relative_path.split("/")[0] in ["brainAtlases", "commonCoordinateSpaces", "brainAtlasVersions", "commonCoordinateSpaceVersions"]: + self._build_complex_sands_instances(instancelib_docu_relative_path, instances_absolute_paths) + # build RST docu for other instances (parcellationEntities, parcellationEntityVersions) + if instancelib_docu_relative_path.split("/")[0] in ["parcellationEntities", "parcellationEntityVersions"]: + self._build_simple_sands_instances(instancelib_docu_relative_path, instances_absolute_paths) else: pass diff --git a/pipeline/schema.py b/pipeline/schema.py index 7f99cde..50828a3 100644 --- a/pipeline/schema.py +++ b/pipeline/schema.py @@ -7,29 +7,32 @@ class SchemaDocBuilder(object): - def __init__(self, schema_file_path:str, root_path:str, relative_path_by_schema:Dict): - _relative_path_without_extension = schema_file_path[len(root_path)+1:].replace(".schema.omi.json", "").split("/") - self.version = _relative_path_without_extension[0] - self.relative_path_without_extension = _relative_path_without_extension[1:] - self.relative_path_by_schema = relative_path_by_schema - self.readthedocs_url = "https://openminds-documentation.readthedocs.io/en/" + def __init__(self, schema_file_path:str, schema_name:str, version:str, relative_paths_for_schema_docu:Dict, instancelib_docu_path_for_schema:Optional[str]): + # load schema payload with open(schema_file_path, "r") as schema_f: self._schema_payload = json.load(schema_f) + # set version, name, and paths + self.version = version + self.schema_name = schema_name + self.schema_relative_path = relative_paths_for_schema_docu[schema_name] + self.relative_paths_for_schema_docu = relative_paths_for_schema_docu + self.instancelib_docu_path_for_schema = instancelib_docu_path_for_schema + self.readthedocs_url = "https://openminds-documentation.readthedocs.io/en/" def _target_file_without_extension(self) -> str: - return os.path.join(self.version, "docs", "schema_specifications", "/".join(self.relative_path_without_extension)) + return os.path.join(self.version, "docs", "schema_specifications", self.schema_relative_path) def build(self): target_file = os.path.join("target", f"{self._target_file_without_extension()}.rst") os.makedirs(os.path.dirname(target_file), exist_ok=True) with open(target_file, "w") as output_file: doc = RstCloth(output_file, line_width=100000) - schema_name = self._schema_payload["name"] - schema_name_camelCase = "".join([schema_name[0].lower(), schema_name[1:]]) - doc.heading(schema_name, char="#", overline=True) + doc.heading(self.schema_name, char="#", overline=True) doc.newline() doc.field(name="Semantic name", value=self._schema_payload["_type"]) doc.newline() + doc.field(name="Display as", value=self._schema_payload["label"]) + doc.newline() if "description" in self._schema_payload and self._schema_payload["description"]: doc.content(self._schema_payload["description"]) doc.newline() @@ -38,15 +41,9 @@ def build(self): doc.field(name="Semantic equivalents", value=semantic_equivalent) doc.newline() doc.newline() - if "controlledTerms" in self._schema_payload["_type"]: - library_subdir = f"terminologies/{schema_name_camelCase}.html" - library_link = os.path.join(self.readthedocs_url, self.version, "instance_libraries", library_subdir) - doc.content(f"For this schema openMINDS provides a `library of instances <{library_link}>`_.") - doc.newline() - if schema_name in ["License", "ContentType"]: - library_subdir = f"{schema_name_camelCase}s.html" - library_link = os.path.join(self.readthedocs_url, self.version, "instance_libraries", library_subdir) - doc.content(f"For this schema openMINDS provides a `library of instances <{library_link}>`_.") + if self.instancelib_docu_path_for_schema: + library_link = os.path.join(self.readthedocs_url, self.version, "instance_libraries", self.instancelib_docu_path_for_schema) + doc.content(f"For this schema openMINDS provides a `library of instances <{library_link}.html>`_.") doc.newline() doc.content("------------") doc.newline() @@ -85,7 +82,7 @@ def build(self): doc.content(value_specs[1], indent=multiline_indent) doc.field(name="instructions", value=p_info["_instruction"], indent=field_list_indent) doc.newline() - doc.content(f"`BACK TO TOP <{schema_name}_>`_") + doc.content(f"`BACK TO TOP <{self.schema_name}_>`_") doc.newline() doc.content("------------") doc.newline() @@ -143,8 +140,8 @@ def _define_target_objects(self, object_list) -> str: object_name_list = [] for object in object_list: object_name = object.split('/')[-1] - if object_name in self.relative_path_by_schema: - object_html_path = f"{self.readthedocs_url}{self.version}/schema_specifications/{self.relative_path_by_schema[object_name]}.html" + if object_name in self.relative_paths_for_schema_docu: + object_html_path = f"{self.readthedocs_url}{self.version}/schema_specifications/{self.relative_paths_for_schema_docu[object_name]}.html" object_name_list.append(f"`{object_name} <{object_html_path}>`_") else: object_name_list.append(f"{object_name} \[TYPE_ERROR\]") diff --git a/pipeline/utils.py b/pipeline/utils.py index 6773696..b0696fd 100644 --- a/pipeline/utils.py +++ b/pipeline/utils.py @@ -1,7 +1,8 @@ +import json import glob import os import shutil -from typing import List, Dict +from typing import List, Dict, Tuple from git import Repo, GitCommandError @@ -33,14 +34,17 @@ def get_schema_versions(self) -> List[str]: def find_schemas(self, version:str) -> List[str]: return glob.glob(os.path.join(self.schemas_sources, version, f'**/*.schema.omi.json'), recursive=True) - def get_relative_path_for_schemas(self, schemas:List[str], version:str) -> Dict: - relative_path_by_schema = {} - for schema in schemas: - schema_name = os.path.basename(schema).replace('.schema.omi.json', '') - schema_name = ''.join(s[0].upper() + s[1:] for s in schema_name.split()) - relative_schema_path = os.path.relpath(schema, start=os.path.join(self.schemas_sources, version)).replace('.schema.omi.json', '') - relative_path_by_schema[schema_name] = relative_schema_path - return relative_path_by_schema + def get_schema_name(self, absolute_path_schema:str, extension=".schema.omi.json") -> str: + schema_name = os.path.basename(absolute_path_schema).replace(extension, '') + return f"{schema_name[0].upper()}{schema_name[1:]}" + + def get_relative_paths_for_schema_docu(self, absolute_paths_for_schemas:List[str], version:str) -> Dict: + relative_paths_for_schema_docu = {} + for absolute_path_schema in absolute_paths_for_schemas: + schema_name = self.get_schema_name(absolute_path_schema) + relative_schema_path = os.path.relpath(absolute_path_schema, start=os.path.join(self.schemas_sources, version)).replace('.schema.omi.json', '') + relative_paths_for_schema_docu[schema_name] = relative_schema_path + return relative_paths_for_schema_docu class InstanceLoader(object): @@ -54,14 +58,38 @@ def get_instance_versions(self) -> List[str]: def find_instances(self, version:str) -> List[str]: return glob.glob(os.path.join(self.instances_sources, version, f'**/*.jsonld'), recursive=True) - def get_relative_path_for_instances(self, instances:List[str], version:str) -> Dict: - relative_path_by_instance = {} - for instance in instances: - instance_name = os.path.basename(instance).replace('.jsonld', '') - instance_name = ''.join(s[0].upper() + s[1:] for s in instance_name.split()) - relative_instance_path = os.path.relpath(instance, start=os.path.join(self.instances_sources, version)).replace('.jsonld', '') - relative_path_by_instance[instance_name] = relative_instance_path - return relative_path_by_instance + def get_relative_paths_and_schemas_for_instancelib_docu(self, absolute_paths_for_instances:List[str], version:str) -> Tuple[Dict,Dict]: + relative_paths_for_instancelib_docu = {} + instancelib_docu_path_for_schemas = {} + for absolute_path_instance in absolute_paths_for_instances: + # define relative path of instance library docu (with list to absolute paths of all related instances) + relative_instance_path = os.path.relpath(absolute_path_instance, start=os.path.join(self.instances_sources, version)).replace('.jsonld', '') + relative_instancelib_path = "/".join(relative_instance_path.split("/")[:-1]) + if relative_instancelib_path in relative_paths_for_instancelib_docu: + relative_paths_for_instancelib_docu[relative_instancelib_path].append(absolute_path_instance) + else: + relative_paths_for_instancelib_docu[relative_instancelib_path] = [absolute_path_instance] + # associate relative instance library docu path to schema type + with open(absolute_path_instance, "r") as instance_f: + instance_payload = json.load(instance_f) + instance_schema = instance_payload["@type"].split("/")[-1] + if instance_schema in ["BrainAtlasVersion", "CommonCoordinateSpaceVersion", "ParcellationEntity", "ParcellationEntityVersion"]: + instancelib_docu_path_for_schemas[instance_schema] = "/".join(relative_instance_path.split("/")[:-2]) + elif instance_schema == "Technique": #FIXME + if relative_instance_path.split("/")[-2] == "analysisTechnique": + instancelib_docu_path_for_schemas["AnalysisTechnique"] = relative_instancelib_path + else: + instancelib_docu_path_for_schemas[instance_schema] = relative_instancelib_path + else: + instancelib_docu_path_for_schemas[instance_schema] = relative_instancelib_path + return relative_paths_for_instancelib_docu, instancelib_docu_path_for_schemas + + def get_path_for_instances(self, instance_paths:List[str], version:str) -> Dict: + path_by_instance = {} + for instance_path in instance_paths: + instance_filename = os.path.basename(instance_path).replace('.jsonld', '') + path_by_instance[instance_filename] = instance_path + return path_by_instance class GitPusher(object):