In [None]:
# default_exp structure

# structure

> Basic functionality related to the structure of the (XML) data

In [None]:
# export
import re

import pandas as pd

## Nested tags

When two a *tag* is nested inside other *tag* the string below is used to assemble the name of the resulting *column* from those of the original tags

In [None]:
# export
nested_tags_separator = ' - '

For instance, a *tag* `<month>` inside a *tag* `<date>` will yield a *column*

In [None]:
f'month{nested_tags_separator}date'

'month - date'

A high-level function to do exactly that

In [None]:
# export
def assemble_name(tags: list) -> str:
    """
    Assemble the name of field/column in the DataFrame from a path of nested tags.
    
    **Parameters**
    
    - tags: list
    
        List of tags.
    
    **Returns**
    
    - out: str
    
        A suitable name.
    
    """
    
    tags = filter(pd.notna, tags)
    tags = filter(lambda x: x!='', tags)
    
    return nested_tags_separator.join(tags)

In [None]:
assemble_name(['foo', 'fa'])

'foo - fa'

`NaN`s are ignored...

In [None]:
assemble_name(['foo', np.nan])

'foo'

...and so are empty strings

In [None]:
assemble_name(['foo', np.nan, ''])

'foo'

In [None]:
# hide
import nbdev.export
nbdev.export.notebook2script('05_structure.ipynb')

Converted 05_structure.ipynb.
