### Attribute Inventory to Metadata v01
Potential future work:
- Load template (MRCOG logo, Contact info, Use limitations)
- Load enumerated domain


In [12]:
# Paste full file path, not just layer name

# Target feature class for metadata
target = r'\\c15\data\GISWork\DataInventory\2021\DASZs\DASZs.gdb\SE_DASZ_2016_2040_Trend'

# Folder to save XML metadata file
workingfolder = r'\\c15\data\GISWork\DataInventory\2021\DASZs'

# Data inventory with information to load into metadata
inventory = r'\\c15\data\GISWork\DataInventory\2021\DASZs\SE_DASZ_2016_2040_Trend_20211215.xlsx'

# Path to MRCOG metadata template file
mrcog_template = r''

from datetime import datetime
start_time = datetime.now()
print(
'''
{}
Ready to create metadata...
For: {}
From: {}
In: {}
'''.format(start_time, target, workingfolder, inventory)
)

Run next cell.


In [13]:
print('Running...')

import xml.etree.ElementTree as ET
from os import path
import openpyxl
from string import ascii_uppercase

template_md = arcpy.metadata.Metadata(mrcog_template) # new
md = arcpy.metadata.Metadata(target)
md.copy(template_md) # new
md.save() # new

md.synchronize('ALWAYS')

fcname = path.split(target)[1]
workxml = path.join(workingfolder, f'{fcname}.xml')
md.exportMetadata(workxml, 'FGDC_CSDGM', 'REMOVE_ALL_SENSITIVE_INFO')

cell_time = datetime.now()

print('{}. Run next cell.'.format(cell_time))

Running...
Run next cell.


In [14]:
print('Running...')

wb = openpyxl.load_workbook(filename=inventory)
ws = wb['Sheet']

columnDict = dict(zip(range(26), ascii_uppercase))

x = ''
headRow = 0
while x != 'Field Name':
    headRow += 1
    x = ws[f'A{headRow}'].value

schema = []
for col in range(ws.max_column):
    c = columnDict[col]
    schema.append(ws[f'{c}{headRow}'].value)

f = columnDict[schema.index('Field Name')]
d = columnDict[schema.index('Definition')]
s = columnDict[schema.index('Def. Source')]
dom = columnDict[schema.index('Domain Type')]
mn = columnDict[schema.index('Minimum')]
mx = columnDict[schema.index('Maximum')]
u = columnDict[schema.index('Range Units')]
n = columnDict[schema.index('Domain Notes')]

a = headRow + 1
b = ws.max_row + 1

defs = {
    ws[f'{f}{i}'].value: ws[f'{d}{i}'].value 
    for i in range(a,b)
}

sources = {
    ws[f'{f}{i}'].value: ws[f'{s}{i}'].value
    for i in range(a,b)
}

doms = {
    ws[f'{f}{i}'].value: ws[f'{dom}{i}'].value
    for i in range(a,b)
}

minmax = {
    ws[f'{f}{i}'].value: [ws[f'{mn}{i}'].value, ws[f'{mx}{i}'].value] 
    for i in range(a,b)
    if 'n/a' not in [ws[f'{mn}{i}'].value, ws[f'{mx}{i}'].value]
}

units = {
    ws[f'{f}{i}'].value: ws[f'{u}{i}'].value
    for i in range(a,b)
}

notes = {
    ws[f'{f}{i}'].value: ws[f'{n}{i}'].value
    for i in range(a,b)
}


tree = ET.parse(workxml)
root = tree.getroot()

firstlevel = [
    ['attrdef', defs],
    ['attrdefs', sources] #,
#     ['attrdomv', doms]
]

for attr in root.iter('attr'):
    label = attr[0].text
    if label in defs.keys():
        tags = [i.tag for i in attr]
        for t, s in firstlevel:
            if t not in tags and s[label] and s[label] != 'n/a':
                ET.SubElement(attr, t).text = s[label]
#         if 'attrdef' not in tags and defs[label] and defs[label] != 'n/a':
#             ET.SubElement(attr, 'attrdef').text = defs[label]
#         if 'attrdefs' not in tags and sources[label] and sources[label] != 'n/a':
#             ET.SubElement(attr, 'attrdefs').text = sources[label]
        if 'attrdomv' not in tags and doms[label] and doms[label] != 'n/a':
            ET.SubElement(attr, 'attrdomv')
        if len(attr) == 4:
            domain = attr[3]
            if doms[label] == 'Range':
                ET.SubElement(domain, 'rdom')
                ET.SubElement(domain[0], 'rdommin').text = str(minmax[label][0])
                ET.SubElement(domain[0], 'rdommax').text = str(minmax[label][1])
                ET.SubElement(domain[0], 'attrunit').text = units[label]
            elif doms[label] == 'Unrepresentable':
                ET.SubElement(domain, 'udom').text = notes[label]
            elif doms[label] == 'Enumerated':
                ET.SubElement(domain, 'edom')
            elif doms[label] == 'Codeset':
                ET.SubElement(domain, 'codesetd')

tree.write(workxml)

cell_time = datetime.now()

print('{}. Complete.'.format(cell_time))

Running...
Complete.


### Map of attributes (attr) in metadata xml:

Index:  Tag:        Text:<br>
0       attrlabl    (field name)<br>
1       attrdef     (definition)<br>
2       attrdefs    (source)<br>
3       attrdomv    <br>

#### Map of attrdomv (attr\[3\])
0   rdom<br>
0,0     rdommin<br>
0,1     rdommax<br>
0,2     attrunit<br>
<br>
OR<br>
<br>
0   udom    Text:(description)<br>
<br>
OR<br>
<br>
0   edom<br>
0,0     edomv   Text:(value)<br>
0,1     edomvd  Text:(value definition)<br>
0,2     edomvds Text:(val def source)<br>
1   edom (structure repeats for each enumerated value)<br>

#### coded domain TBD