In [12]:
!pip list | grep oak

oaklib                        0.5.8


In [13]:
from oaklib import get_adapter
from oaklib.datamodels.vocabulary import IS_A
adapter = get_adapter('pronto:hp-base.obo')
print(adapter.label("HP:0000001"))

All


In [40]:
from xml.etree.ElementTree import Element, SubElement, tostring, fromstring
from xml.dom import minidom

def assemble_xliff_file(translation_units):
    xliff = Element('xliff', version="1.2")
    file = SubElement(xliff, 'file', {'original': "HPO_classes", 'source-language': "en-US"})
    body = SubElement(file, 'body')

    for unit in translation_units:
        unit_xml = unit.toxml()
        unit_element = fromstring(unit_xml)
        body.append(unit_element)

    raw_string = tostring(xliff, 'utf-8')
    reparsed = minidom.parseString(raw_string)

    return reparsed.toprettyxml(indent="  ")
        

def assemble_xliff_translation_unit(identifier, id_normalised, label, element, value):
    if not label:
        label = "no label"
    trans_unit = Element('trans-unit', id=f"{id_normalised}_{element}")
    source = SubElement(trans_unit, 'source', {'xml:lang': 'en'})
    source.text = value
    note = SubElement(trans_unit, 'note')
    note.text = f"definition of {identifier} ({label})"
    raw = tostring(trans_unit, 'utf-8')
    reparsed = minidom.parseString(raw)
    return reparsed

def generate_translation_units(identifier, label, definition, synonyms):
    elements = list()
    
    id_normalised = identifier.replace(":","_")
    
    if label:
        reparsed_label = assemble_xliff_translation_unit(identifier, id_normalised, label, "label", label)
        elements.append(reparsed_label)

    if definition:
        reparsed_def = assemble_xliff_translation_unit(identifier, id_normalised, label, "definition", definition)
        elements.append(reparsed_def)
        
    if synonyms:
        synonyms_normalised = " ".join([f"#{synonym}" for synonym in synonyms])
        reparsed_synonyms = assemble_xliff_translation_unit(identifier, id_normalised, label, "synonym", synonyms_normalised)
        elements.append(reparsed_synonyms)
    return elements


translation_units = list()

for hp in adapter.descendants("HP:0011015", predicates=[IS_A]):
    label = adapter.label(hp)
    definition = adapter.definition(hp)
    alias_map = adapter.entity_alias_map(hp)
    synonyms = [v for p, vl in alias_map.items() if 'oio:hasExactSynonym'==p for v in vl]
    xml = generate_translation_units(hp, label, definition, synonyms)
    translation_units.extend(xml)

xliff = assemble_xliff_file(translation_units)
print(xliff)

<?xml version="1.0" ?>
<xliff version="1.2">
  <file original="HPO_classes" source-language="en-US">
    <body>
      <trans-unit id="HP_0004914_label">
        <source xml:lang="en">Recurrent infantile hypoglycemia</source>
        <note>definition of HP:0004914 (Recurrent infantile hypoglycemia)</note>
      </trans-unit>
      <trans-unit id="HP_0004914_definition">
        <source xml:lang="en">Recurrent episodes of decreased concentration of glucose in the blood occurring during the infantile period.</source>
        <note>definition of HP:0004914 (Recurrent infantile hypoglycemia)</note>
      </trans-unit>
      <trans-unit id="HP_0004914_synonym">
        <source xml:lang="en">#Episodic infantile hypoglycemia #Recurrent low blood sugar in infant</source>
        <note>definition of HP:0004914 (Recurrent infantile hypoglycemia)</note>
      </trans-unit>
      <trans-unit id="HP_0012051_label">
        <source xml:lang="en">Reactive hypoglycemia</source>
        <note>definition