# Test Schema View
Notebook that tests how to use the linkml schema view utility for interrogating the NMDC Schema.

In [1]:
from linkml_runtime.utils.schemaview import SchemaView
from nmdc_schema.get_nmdc_view import ViewGetter

In [2]:
vg = ViewGetter()

In [3]:
view = vg.get_view()

### Get list of 5 classes

In [4]:
list(view.all_classes())[0:5]

['NucleotideSequencing',
 'MassSpectrometry',
 'Configuration',
 'MassSpectrometryConfiguration',
 'ChromatographyConfiguration']

### Get list of 5 slots

In [5]:
list(view.all_slots())[0:5]

['polarity_mode',
 'mass_spectrum_collection_modes',
 'ionization_source',
 'mass_analyzers',
 'resolution_categories']

### Check if slot is mulitvalued

In [6]:
view.get_slot('study_set').multivalued

### Determine range of slot

In [7]:
view.get_slot('has_input').range

'NamedThing'

### Determine range as specified in the slot_usage

In [8]:
view.get_class('MaterialProcessing').slot_usage['has_input'].range

### If slot is not in slot_usage, an error will be throw. So, use function to determine slot range.

In [9]:
def get_class_slot_range(view_obj, class_name, slot_name):
    if slot_name in view_obj.get_class(class_name).slot_usage:
        return view_obj.get_class(class_name).slot_usage[slot_name].range
    else:
        return view_obj.get_slot(slot_name).range

print(get_class_slot_range(view, 'MaterialProcessing', 'has_input'))
        

None


### Test for non-existent slots

In [10]:
view.get_slot('foo') ## nothing returned

In [11]:
'foo' in view.get_class('MaterialProcessing').slots

False

### Use induced_slot method to find range

In [12]:
help(SchemaView.induced_slot)

Help on _lru_cache_wrapper in module linkml_runtime.utils.schemaview:

induced_slot(self, slot_name: Union[linkml_runtime.linkml_model.meta.SlotDefinitionName, str], class_name: Union[linkml_runtime.linkml_model.meta.ClassDefinitionName, str] = None, imports=True, mangle_name=False) -> linkml_runtime.linkml_model.meta.SlotDefinition
    Given a slot, in the context of a particular class, yield a dynamic SlotDefinition that
    has all properties materialized.
    
    This makes use of schema slots, such as attributes, slot_usage. It also uses ancestor relationships
    to infer missing values, for inheritable slots
    
    :param slot_name: slot to be queries
    :param class_name: class used as context
    :param imports: include imports closure
    :return: dynamic slot constructed by inference



In [13]:
view.induced_slot('has_input', 'MaterialProcessing').range

'NamedThing'

### SchemaView won't return the name of the class as it is named in the module. This does.

In [14]:
import inspect
from nmdc_schema import nmdc


In [15]:
python_name_dict = {}

for name, member in inspect.getmembers(nmdc):
    if inspect.isclass(member) and hasattr(member, 'class_name'):
        python_name_dict[name] = member.class_name
        
print(list(python_name_dict.items())[0:5])

[('AttributeValue', 'AttributeValue'), ('Biosample', 'Biosample'), ('CalibrationInformation', 'CalibrationInformation'), ('ChemicalConversionProcess', 'ChemicalConversionProcess'), ('ChemicalEntity', 'ChemicalEntity')]


### Use dict of class names to get info

In [16]:
class_name = 'nmdc:Study' # test for curie
x_name = class_name if -1 == class_name.find(":") else class_name.partition(":")[-1]
x = view.get_class(python_name_dict[x_name])

In [17]:
x.name

'Study'

In [18]:
x.slots

['emsl_project_identifiers',
 'gnps_task_identifiers',
 'gold_study_identifiers',
 'insdc_bioproject_identifiers',
 'jgi_portal_study_identifiers',
 'mgnify_project_identifiers',
 'neon_study_identifiers',
 'related_identifiers',
 'alternative_descriptions',
 'alternative_names',
 'alternative_titles',
 'ecosystem',
 'ecosystem_category',
 'ecosystem_subtype',
 'ecosystem_type',
 'specific_ecosystem',
 'associated_dois',
 'funding_sources',
 'has_credit_associations',
 'homepage_website',
 'notes',
 'objective',
 'part_of',
 'principal_investigator',
 'protocol_link',
 'study_category',
 'study_image',
 'title',
 'websites']

In [19]:
s = view.get_slot('websites')

In [20]:
s.description

'A list of websites that are associated with the entity.'

In [21]:
list(x.slot_usage) # slot usage is a dict

['id',
 'name',
 'websites',
 'homepage_website',
 'description',
 'notes',
 'alternative_identifiers',
 'alternative_names',
 'related_identifiers',
 'insdc_bioproject_identifiers',
 'part_of',
 'protocol_link']

In [22]:
s = view.induced_slot('associated_dois', x.name)

In [23]:
s.description

'A list of DOIs associated with a resource, such as a list of DOIS associated with a Study.'

### If necessary, here is how you retrieve a class using a string.

In [24]:
eval('nmdc.Biosample')

nmdc_schema.nmdc.Biosample

In [25]:
inspect.isclass(eval('nmdc.Biosample'))

True

### Example of how to get the permissible values for enums

In [26]:
file_enums = view.get_enum('FileTypeEnum').permissible_values # returns a dict

#### get a permissible value

In [27]:
tigr = file_enums['TIGRFam Annotation GFF']
tigr

PermissibleValue(text='TIGRFam Annotation GFF', description='GFF3 format file with TIGRfam', meaning=None, unit=None, is_a=None, mixins=[], extensions={}, annotations={'file_name_pattern': Annotation(tag='file_name_pattern', value='[GOLD-AP]_tigrfam.gff', extensions={}, annotations={})}, alt_descriptions={}, title=None, deprecated=None, todos=[], notes=[], comments=[], examples=[], in_subset=[], from_schema=None, imported_from=None, source=None, in_language=None, see_also=[], deprecated_element_has_exact_replacement=None, deprecated_element_has_possible_replacement=None, aliases=[], structured_aliases={}, mappings=[], exact_mappings=[], close_mappings=[], related_mappings=[], narrow_mappings=[], broad_mappings=[], created_by=None, contributors=[], created_on=None, last_updated_on=None, modified_by=None, status=None, rank=None, categories=[], keywords=[])

In [28]:
tigr.description

'GFF3 format file with TIGRfam'

#### get the annotations for the permissible value
note: trying to get the value from an annotation that doesn't exist will throw an error

In [29]:
tigr.annotations

{'file_name_pattern': Annotation(tag='file_name_pattern', value='[GOLD-AP]_tigrfam.gff', extensions={}, annotations={})}

In [30]:
tigr.annotations.get('file_name_pattern', None)

Annotation(tag='file_name_pattern', value='[GOLD-AP]_tigrfam.gff', extensions={}, annotations={})

In [31]:
tigr.annotations.get('file_name_pattern', None).value

'[GOLD-AP]_tigrfam.gff'