# Python code to parse nexus base class nxdl files to python dict and create owl ontology from dict
# Note this is not the ontology to use, just an illustration

# To do:
# NeXus application defintions - create and doc
# find all base classes in application definitions and process with original set, adding new terms to class dictionary.
# example skos:broader


In [1]:
#get a list of NeXus base class urls from github

#pip install pygithub
token = "e9bedb765c37956e75c9a552f93455034ce54aa5"

from github import Github

g = Github(token)
repo = g.get_repo('nexusformat/definitions')

base_class_url = []
for file in repo.get_contents("base_classes"):
    if str(file).split('.')[-2] == 'nxdl':
        base_class_url += [file.download_url]
    


In [2]:
# parse nexus base class files via url to python dictionary

import xml.dom.minidom
import os
import yaml
import urllib
import time

_maxTries = 10 # try to parse file this many times before giving up


#folder_pattern = '/dls/science/users/spc93/misc_nexus_data/nexus_classes/baseclasses/NX*.xml'

join_string = ':'      #string added between joined names

#def get_file_list_from_folder(folder_pattern):
#    file_list = []
#    ff = os.popen('ls ' + folder_pattern)
#    file_list += ff.read().splitlines()
#    return(file_list)

        
#file_list = get_file_list_from_folder(folder_pattern)



classDict = {}

#for file in file_list:
for file in base_class_url:
    print(file)

    #dom1 = xml.dom.minidom.parse(file)
    #dom1 = xml.dom.minidom.parse(urllib.request.urlopen(file))
    for i in range(_maxTries):
        try:
            dom1 = xml.dom.minidom.parse(urllib.request.urlopen(file))
            break
        except:
            print('=== Problem parsing %s; try %i times then give up' % (file, _maxTries))
            time.sleep(1)
    
    defn = dom1.getElementsByTagName('definition')[0]
    
    className = defn.getAttribute('name') #class name from name attribute in definition
    classDict[className] = {} # each class is a dictionary
    classDict[className]['xml_file'] = file

    #category = defn.getAttribute('category')
    classDict[className]['extends'] = defn.getAttribute('extends')
    classDict[className]['classDoc'] = dom1.getElementsByTagName('doc')[0].firstChild.nodeValue.replace('\n','')
    
    
    classDict[className]['fields'] = {}
    
    for field in defn.getElementsByTagName('field'):
        field_name = field.getAttribute('name')
        long_name = className + join_string + field_name
        classDict[className]['fields'][long_name] = {}
        
        classDict[className]['fields'][long_name]['fieldName'] = field_name
        classDict[className]['fields'][long_name]['units'] = field.getAttribute('units')
        
        _type = field.getAttribute('type')
        if _type == '':
            _type = 'NX_CHAR'   # default if not specified

        classDict[className]['fields'][long_name]['type'] = _type
        
        try:
            field_doc = field.getElementsByTagName('doc')[0].firstChild.nodeValue.replace('\n','')
        except:
            field_doc = ''
        classDict[className]['fields'][long_name]['fieldDoc'] = field_doc
    

print(yaml.dump(classDict))



https://raw.githubusercontent.com/nexusformat/definitions/master/base_classes/NXaperture.nxdl.xml
https://raw.githubusercontent.com/nexusformat/definitions/master/base_classes/NXattenuator.nxdl.xml
https://raw.githubusercontent.com/nexusformat/definitions/master/base_classes/NXbeam.nxdl.xml
https://raw.githubusercontent.com/nexusformat/definitions/master/base_classes/NXbeam_stop.nxdl.xml
https://raw.githubusercontent.com/nexusformat/definitions/master/base_classes/NXbending_magnet.nxdl.xml
https://raw.githubusercontent.com/nexusformat/definitions/master/base_classes/NXcapillary.nxdl.xml
https://raw.githubusercontent.com/nexusformat/definitions/master/base_classes/NXcite.nxdl.xml
https://raw.githubusercontent.com/nexusformat/definitions/master/base_classes/NXcollection.nxdl.xml
https://raw.githubusercontent.com/nexusformat/definitions/master/base_classes/NXcollimator.nxdl.xml
https://raw.githubusercontent.com/nexusformat/definitions/master/base_classes/NXcrystal.nxdl.xml
https://raw.git

In [3]:
#pip install owlready2

from owlready2 import *
#onto_path.append("/dls/science/users/spc93/ontology")
import types

onto = get_ontology('file:///home/spc93/ontology/NeXus_onto2.owl')


with onto:

    ## Classes (generic)

    class NeXus(Thing):
        comment = 'NeXus concept bla bla'
    
    class NeXusField(NeXus):
        comment = 'NeXus field. Unique names are created by prepending the NeXus class name to the NeXus field name'

    class NXobject(NeXus):
        comment = 'NXobject'
    
    class NeXusBaseClass(NXobject):
        comment = 'NeXusBaseClass'
    
    class NeXusDefinition(NXobject):
        comment = 'NeXus Application Definition'
      
    class NeXusUnitCategory(NeXus):
        comment = 'NeXus Unit Category'
        
    class NeXusUnit(NeXusUnitCategory):
        comment = 'NeXus Unit'
        
    ## properties (generic)
    
    class hasField(NXobject >> NeXusField):
        comment = 'NXobject has NeXus field relationship'
    
    ## is there a shorthand form with inverse?
    ## how to assert the reverse inverse? By reasoner?
    
    class isFieldOf(ObjectProperty):
        domain           = [NeXusField]
        range            = [NXobject]
        inverse_property = hasField
        
    class hasUnit(NeXusField >> NeXusUnit):
        comment = 'NeXus Field has Unit relationship'
        
    class citesGroup(NXobject >> NeXusBaseClass):
        comment = 'NXobject cites base class relationship'
        
    class hasFieldName(NeXusField >> str):
        comment = 'NeXus field name of NeXus field. NeXus field names are unique only within the NeXus class in which they are defined.'
    
    class NXDLurl(AnnotationProperty):
        pass
    
    ## relationships (generic)

    ## from NXDL files
    
    for nxBaseClass in classDict.keys():
        
        if not nxBaseClass == 'NXobject':    # NXobject can't be subclass of NXobject
            _nx_class = types.new_class(nxBaseClass, (NeXusBaseClass,))
            _nx_class.comment.append(classDict[nxBaseClass]['classDoc'])
            _nx_class.NXDLurl.append(classDict[nxBaseClass]['xml_file'])
           
            
            for nxField in classDict[nxBaseClass]['fields'].keys():
                _nx_field = types.new_class(nxField, (NeXusField,))
                _nx_field.comment.append(classDict[nxBaseClass]['fields'][nxField]['fieldDoc'])
                _nx_class.is_a.append(hasField.some(_nx_field))
         
onto.save()




In [6]:
#############  test code for application definitions

#get a list of NeXus base class urls from github

#pip install pygithub
token = "e9bedb765c37956e75c9a552f93455034ce54aa5"

from github import Github

g = Github(token)
repo = g.get_repo('nexusformat/definitions')

application_url = []
for file in repo.get_contents("applications"):
    try:
        if str(file).split('.')[-2] == 'nxdl':
            application_url += [file.download_url]
    except:
        pass

In [7]:
application_url

['https://raw.githubusercontent.com/nexusformat/definitions/master/applications/NXarchive.nxdl.xml',
 'https://raw.githubusercontent.com/nexusformat/definitions/master/applications/NXarpes.nxdl.xml',
 'https://raw.githubusercontent.com/nexusformat/definitions/master/applications/NXcanSAS.nxdl.xml',
 'https://raw.githubusercontent.com/nexusformat/definitions/master/applications/NXdirecttof.nxdl.xml',
 'https://raw.githubusercontent.com/nexusformat/definitions/master/applications/NXfluo.nxdl.xml',
 'https://raw.githubusercontent.com/nexusformat/definitions/master/applications/NXindirecttof.nxdl.xml',
 'https://raw.githubusercontent.com/nexusformat/definitions/master/applications/NXiqproc.nxdl.xml',
 'https://raw.githubusercontent.com/nexusformat/definitions/master/applications/NXlauetof.nxdl.xml',
 'https://raw.githubusercontent.com/nexusformat/definitions/master/applications/NXmonopd.nxdl.xml',
 'https://raw.githubusercontent.com/nexusformat/definitions/master/applications/NXmx.nxdl.xml

In [23]:
# parse nexus base class files via url to python dictionary

import xml.dom.minidom
import os
import yaml
import urllib
import time


dom1 = xml.dom.minidom.parse(urllib.request.urlopen(application_url[4])) # pick one at random

group = dom1.getElementsByTagName('group')

for grp in group:
    print(grp.getAttribute('type'))
    print()

# cntrl / to comment/uncomment


# ''

# _maxTries = 10 # try to parse file this many times before giving up


# #folder_pattern = '/dls/science/users/spc93/misc_nexus_data/nexus_classes/baseclasses/NX*.xml'

# join_string = ':'      #string added between joined names


# classDict = {}

# #for file in file_list:
# for file in base_class_url:
#     print(file)

#     #dom1 = xml.dom.minidom.parse(file)
#     #dom1 = xml.dom.minidom.parse(urllib.request.urlopen(file))
#     for i in range(_maxTries):
#         try:
#             dom1 = xml.dom.minidom.parse(urllib.request.urlopen(file))
#             break
#         except:
#             print('=== Problem parsing %s; try %i times then give up' % (file, _maxTries))
#             time.sleep(1)
    
#     defn = dom1.getElementsByTagName('definition')[0]
    
#     className = defn.getAttribute('name') #class name from name attribute in definition
#     classDict[className] = {} # each class is a dictionary
#     classDict[className]['xml_file'] = file

#     #category = defn.getAttribute('category')
#     classDict[className]['extends'] = defn.getAttribute('extends')
#     classDict[className]['classDoc'] = dom1.getElementsByTagName('doc')[0].firstChild.nodeValue.replace('\n','')
    
    
#     classDict[className]['fields'] = {}
    
#     for field in defn.getElementsByTagName('field'):
#         field_name = field.getAttribute('name')
#         long_name = className + join_string + field_name
#         classDict[className]['fields'][long_name] = {}
        
#         classDict[className]['fields'][long_name]['fieldName'] = field_name
#         classDict[className]['fields'][long_name]['units'] = field.getAttribute('units')
        
#         _type = field.getAttribute('type')
#         if _type == '':
#             _type = 'NX_CHAR'   # default if not specified

#         classDict[className]['fields'][long_name]['type'] = _type
        
#         try:
#             field_doc = field.getElementsByTagName('doc')[0].firstChild.nodeValue.replace('\n','')
#         except:
#             field_doc = ''
#         classDict[className]['fields'][long_name]['fieldDoc'] = field_doc
    

# print(yaml.dump(classDict))
# ''''


NXentry

NXinstrument

NXsource

NXmonochromator

NXdetector

NXsample

NXmonitor

NXdata



In [15]:
dom1
group

<DOM Element: group at 0x7f00e1674af0>

In [21]:
grp.getAttribute('name')

'data'

In [22]:
grp.getAttribute('type')

'NXdata'