# Test Schema View
Notebook that tests how to use the linkml shema view utility for interrogating the NMDC Schema.

In [1]:
from linkml_runtime.utils.schemaview import SchemaView

In [2]:
view = SchemaView('../src/schema/nmdc.yaml')

In [3]:
view.imports_closure()

['NMDC',
 'workflow_execution_activity',
 'external_identifiers',
 'core',
 'prov',
 'basic_slots',
 'linkml:types',
 'portal/sample_id',
 'portal/mixs_inspired',
 'portal/jgi_metatranscriptomics',
 'portal/jgi_metagenomics',
 'portal/emsl',
 'mixs',
 'bioscales',
 'annotation']

### Get list of classes (limit to 5)

In [4]:
list(view.all_class())[0:5]

  list(view.all_class())[0:5]


['database', 'data object', 'biosample', 'study', 'biosample processing']

### Get list of all slots (limit to 5)

In [5]:
list(view.all_slot())[0:5]

  list(view.all_slot())[0:5]


['canary',
 'ess dive datasets',
 'has credit associations',
 'study image',
 'relevant protocols']

### Check if slot is mulitvalued

In [6]:
view.get_slot('study set').multivalued

True

### Determine range of slot

In [7]:
view.get_slot('has input').range

'named thing'

### Determine range as specified in the slot_usage

In [8]:
view.get_class('biosample processing').slot_usage['has input'].range

'biosample'

### If slot is not in slot_usage, an error will be throw. So, use function to determine slot range.

In [9]:
def get_class_slot_range(view, class_name, slot_name):
    if slot_name in view.get_class(class_name).slot_usage:
        return view.get_class(class_name).slot_usage[slot_name].range
    else:
        return view.get_slot(slot_name).range

print(get_class_slot_range(view, 'biosample processing', 'has input'))
print(get_class_slot_range(view, 'biosample processing', 'has output'))
        

biosample
named thing


### Test for non-existent slots

In [10]:
view.get_slot('foo') ## nothing returned

In [11]:
'foo' in view.get_class('biosample processing').slots

False

### Use induced_slot method to find range

In [12]:
 help(SchemaView.induced_slot)

Help on _lru_cache_wrapper in module linkml_runtime.utils.schemaview:

induced_slot(self, slot_name: Union[linkml_runtime.linkml_model.meta.SlotDefinitionName, str], class_name: Union[linkml_runtime.linkml_model.meta.ClassDefinitionName, str] = None, imports=True) -> linkml_runtime.linkml_model.meta.SlotDefinition
    Given a slot, in the context of a particular class, yield a dynamic SlotDefinition that
    has all properties materialized.
    
    This makes use of schema slots, such as attributes, slot_usage. It also uses ancestor relationships
    to infer missing values
    
    :param slot_name: slot to be queries
    :param class_name: class used as context
    :param imports: include imports closure
    :return: dynamic slot constructed by inference



In [13]:
view.induced_slot('has input', 'biosample processing').range

'biosample'

In [14]:
view.induced_slot('has input', 'omics processing').range

'biosample'

### SchemaView won't return the name of the class as it is named in the module. This does.

In [15]:
import inspect
from nmdc_schema import nmdc
from nmdc_schema.nmdc import *

In [16]:
python_name_dict = {}

for name, member in inspect.getmembers(nmdc):
    if inspect.isclass(member) and hasattr(member, 'class_name'):
        python_name_dict[name] = member.class_name
        
print(list(python_name_dict.items())[0:5])

[('Activity', 'activity'), ('Agent', 'agent'), ('AttributeValue', 'attribute value'), ('Biosample', 'biosample'), ('BiosampleProcessing', 'biosample processing')]


### Use dict of class names to get info

In [17]:
# class_name = 'Study'
class_name = 'nmdc:Study' # test for curie
x_name = class_name if -1 == class_name.find(":") else class_name.partition(":")[-1]
x = view.get_class(python_name_dict[x_name])

In [18]:
x.name

'study'

In [19]:
x.slots

['ecosystem',
 'ecosystem_category',
 'ecosystem_type',
 'ecosystem_subtype',
 'specific_ecosystem',
 'principal investigator',
 'doi',
 'title',
 'alternative titles',
 'alternative descriptions',
 'alternative names',
 'abstract',
 'objective',
 'websites',
 'publications',
 'ess dive datasets',
 'type',
 'relevant protocols',
 'funding sources',
 'INSDC bioproject identifiers',
 'INSDC SRA ENA study identifiers',
 'GOLD study identifiers',
 'MGnify project identifiers',
 'has credit associations',
 'study image']

In [20]:
s = view.get_slot('websites')

In [21]:
s.description

'A list of websites that are assocatiated with the entity.'

In [22]:
list(x.slot_usage) # slot usage is a dict

['doi']

In [23]:
s = view.induced_slot('doi', x.name)

In [24]:
s.description # <-- should have the description: The dataset citation for this study

### If necessary, here is how you retrieve a class using a string.

In [25]:
eval('nmdc.Biosample')

nmdc_schema.nmdc.Biosample

In [26]:
eval('Biosample')

nmdc_schema.nmdc.Biosample

In [27]:
inspect.isclass(eval('Biosample'))

True

In [28]:
x = eval('BiosampleProcessing')

In [29]:
x.class_name

'biosample processing'

### Example of how to get the permissible values for enums

In [67]:
file_enums = view.get_enum('file type enum').permissible_values # returns a dict

#### get a permissible value

In [68]:
tigr = file_enums['TIGRFam Annotation GFF']
tigr

PermissibleValue(text='TIGRFam Annotation GFF', description='GFF3 format file with TIGRfam', meaning=None, is_a=None, mixins=[], extensions={}, annotations={'file_name_pattern': Annotation(tag='file_name_pattern', value='[GOLD-AP]_tigrfam.gff', extensions={}, annotations={})}, alt_descriptions={}, title=None, deprecated=None, todos=[], notes=[], comments=[], examples=[], in_subset=[], from_schema=None, imported_from=None, see_also=[], deprecated_element_has_exact_replacement=None, deprecated_element_has_possible_replacement=None)

In [61]:
tigr.description

'GFF3 format file with TIGRfam'

#### get the annotations for the permissible value
note: trying to get the value from an annotation that doesn't exist will throw an error

In [63]:
tigr.annotations

{'file_name_pattern': Annotation(tag='file_name_pattern', value='[GOLD-AP]_tigrfam.gff', extensions={}, annotations={})}

In [64]:
tigr.annotations.get('file_name_pattern', None)

Annotation(tag='file_name_pattern', value='[GOLD-AP]_tigrfam.gff', extensions={}, annotations={})

In [66]:
tigr.annotations.get('file_name_pattern', None).value

'[GOLD-AP]_tigrfam.gff'