In [25]:
import pprint

import pandas as pd
from linkml_runtime import SchemaView
from linkml_runtime.dumpers import yaml_dumper

In [26]:
import linkml

In [27]:
nmdc_schema_file = "../src/schema/nmdc.yaml"

In [28]:
nmdc_schema_view = SchemaView(nmdc_schema_file)

In [None]:
nmdc_schema_view.merge_imports()

In [29]:
nmdc_schema_elements = nmdc_schema_view.all_elements()

In [35]:
def make_elements_sheet(schema_elements):
    element_list = []
    for k,v in schema_elements.items():
        # print(v.name)
        # print(type(v).class_name)
        element_list.append({"element":v.name, "type":type(v).class_name, "from_schema":v.from_schema})
    elements_sheet = pd.DataFrame(element_list)
    return elements_sheet

In [36]:
elements_sheet = make_elements_sheet(nmdc_schema_elements)

In [37]:
elements_sheet

Unnamed: 0,element,type,from_schema
0,database,class_definition,https://microbiomedata/schema
1,data object,class_definition,https://microbiomedata/schema
2,biosample,class_definition,https://microbiomedata/schema
3,study,class_definition,https://microbiomedata/schema
4,biosample processing,class_definition,https://microbiomedata/schema
...,...,...,...
984,investigation,subset_definition,https://microbiomedata/schema/core
985,mixs extension,subset_definition,https://microbiomedata/schema/core
986,nucleic acid sequence source,subset_definition,https://microbiomedata/schema/core
987,sequencing,subset_definition,https://microbiomedata/schema/core


In [40]:
nmdc_schema = nmdc_schema_view.schema

schema_attributes = nmdc_schema

del schema_attributes['classes']
del schema_attributes['slots']
del schema_attributes['enums']
del schema_attributes['subsets']



In [41]:
print(yaml_dumper.dumps(schema_attributes))

name: NMDC
description: "Schema for National Microbiome Data Collaborative (NMDC).\nThis schema\
  \ is organized into distinct modules:\n\n * a set of core types for representing\
  \ data values\n * the mixs schema (auto-translated from mixs excel)\n * annotation\
  \ schema\n * the NMDC schema itself"
title: NMDC Schema
id: https://microbiomedata/schema
version: 4.0.0
imports:
- annotation
- bioscales
- core
- external_identifiers
- linkml:types
- mixs
- portal/emsl
- portal/jgi_metagenomics
- portal/jgi_metatranscriptomics
- portal/mixs_inspired
- portal/sample_id
- prov
- workflow_execution_activity
license: https://creativecommons.org/publicdomain/zero/1.0/
prefixes:
  GOLD:
    prefix_prefix: GOLD
    prefix_reference: https://identifiers.org/gold/
  MIXS:
    prefix_prefix: MIXS
    prefix_reference: https://w3id.org/gensc/
  MS:
    prefix_prefix: MS
    prefix_reference: http://purl.obolibrary.org/obo/MS_
  NCIT:
    prefix_prefix: NCIT
    prefix_reference: http://purl.oboli

In [42]:
print(yaml_dumper.dumps(schema_attributes.prefixes))

GOLD:
  prefix_prefix: GOLD
  prefix_reference: https://identifiers.org/gold/
MIXS:
  prefix_prefix: MIXS
  prefix_reference: https://w3id.org/gensc/
MS:
  prefix_prefix: MS
  prefix_reference: http://purl.obolibrary.org/obo/MS_
NCIT:
  prefix_prefix: NCIT
  prefix_reference: http://purl.obolibrary.org/obo/NCIT_
OBI:
  prefix_prefix: OBI
  prefix_reference: http://purl.obolibrary.org/obo/OBI_
biolink:
  prefix_prefix: biolink
  prefix_reference: https://w3id.org/biolink/vocab/
dcterms:
  prefix_prefix: dcterms
  prefix_reference: http://purl.org/dc/terms/
igsn:
  prefix_prefix: igsn
  prefix_reference: https://app.geosamples.org/sample/igsn/
img.taxon:
  prefix_prefix: img.taxon
  prefix_reference: http://img.jgi.doe.gov/cgi-bin/w/main.cgi?section=TaxonDetail&taxon_oid=
linkml:
  prefix_prefix: linkml
  prefix_reference: https://w3id.org/linkml/
nmdc:
  prefix_prefix: nmdc
  prefix_reference: https://microbiomedata/meta/
qud:
  prefix_prefix: qud
  prefix_reference: http://qudt.org/1.1

In [44]:
prefix_list = []

for k, v in nmdc_schema_view.schema.prefixes.items():
    print(f"{v['prefix_prefix']} {v['prefix_reference']}")
    prefix_list.append({"prefix_prefix": v['prefix_prefix'], "prefix_reference": v['prefix_reference']})
prefix_sheet = pd.DataFrame(prefix_list)

prefix_sheet

GOLD https://identifiers.org/gold/
MIXS https://w3id.org/gensc/
MS http://purl.obolibrary.org/obo/MS_
NCIT http://purl.obolibrary.org/obo/NCIT_
OBI http://purl.obolibrary.org/obo/OBI_
biolink https://w3id.org/biolink/vocab/
dcterms http://purl.org/dc/terms/
igsn https://app.geosamples.org/sample/igsn/
img.taxon http://img.jgi.doe.gov/cgi-bin/w/main.cgi?section=TaxonDetail&taxon_oid=
linkml https://w3id.org/linkml/
nmdc https://microbiomedata/meta/
qud http://qudt.org/1.1/schema/qudt#
rdfs http://www.w3.org/2000/01/rdf-schema#
wgs http://www.w3.org/2003/01/geo/wgs84_pos
COG http://example.com/
EC http://example.com/
ISA http://example.com/
MetaCyc http://example.com/
MetaNetX http://example.com/
RetroRules http://example.com/
UniProtKB http://example.com/
gtpo http://example.com/
insdc.srs http://example.com/
mgnify http://example.com/
nmdc_anno https://microbiomedata/schema/annotation/
nmdc_mixs https://microbiomedata/schema/mixs/
nmdc_wfea https://microbiomedata/schema/workflow_execut

Unnamed: 0,prefix_prefix,prefix_reference
0,GOLD,https://identifiers.org/gold/
1,MIXS,https://w3id.org/gensc/
2,MS,http://purl.obolibrary.org/obo/MS_
3,NCIT,http://purl.obolibrary.org/obo/NCIT_
4,OBI,http://purl.obolibrary.org/obo/OBI_
5,biolink,https://w3id.org/biolink/vocab/
6,dcterms,http://purl.org/dc/terms/
7,igsn,https://app.geosamples.org/sample/igsn/
8,img.taxon,http://img.jgi.doe.gov/cgi-bin/w/main.cgi?sect...
9,linkml,https://w3id.org/linkml/


In [45]:
meta_url="https://raw.githubusercontent.com/linkml/linkml-model/main/linkml_model/model/schema/meta.yaml"
meta_view = SchemaView(meta_url)
slot_definition = meta_view.induced_class("slot_definition")
slot_slots = slot_definition.attributes


In [48]:
list(slot_slots.keys())

['singular_name',
 'domain',
 'slot_uri',
 'multivalued',
 'inherited',
 'readonly',
 'ifabsent',
 'list_elements_unique',
 'list_elements_ordered',
 'shared',
 'key',
 'identifier',
 'designates_type',
 'alias',
 'owner',
 'domain_of',
 'subproperty_of',
 'symmetric',
 'reflexive',
 'locally_reflexive',
 'irreflexive',
 'asymmetric',
 'transitive',
 'inverse',
 'is_class_field',
 'transitive_form_of',
 'reflexive_transitive_form_of',
 'role',
 'is_usage_slot',
 'usage_slot_name',
 'relational_role',
 'slot_group',
 'is_grouping_slot',
 'path_rule',
 'disjoint_with',
 'children_are_mutually_disjoint',
 'union_of',
 'range',
 'range_expression',
 'enum_range',
 'required',
 'recommended',
 'inlined',
 'inlined_as_list',
 'minimum_value',
 'maximum_value',
 'pattern',
 'structured_pattern',
 'unit',
 'implicit_prefix',
 'equals_string',
 'equals_string_in',
 'equals_number',
 'equals_expression',
 'minimum_cardinality',
 'maximum_cardinality',
 'has_member',
 'all_members',
 'none_of',
 