In [142]:
# Script to build term list pages using Markdown.
# Steve Baskauf 2020-06-28
# This script merges static Markdown header and footer documents with term information tables (in Markdown) generated from data in the rs.tdwg.org repo from the TDWG Github site

import re
import requests   # best library to manage HTTP transactions
import csv        # library to read/write/parse CSV files
import json       # library to convert JSON to Python data structures
import pandas as pd

# -----------------
# Configuration section
# -----------------

# !!!! NOTE !!!!
# There is not currently an example of a complex vocabulary that has the column headers
# used in the sample files. In order to test this script, it uses the Audubon Core files,
# which have headers that differ from the samples. So throughout the code, there are
# pairs of lines where the default header names are commented out and the Audubon Core
# headers are not. To build a page using the sample files, you will need to reverse the
# commenting of these pairs.

# This is the base URL for raw files from the branch of the repo that has been pushed to GitHub
githubBaseUri = 'https://raw.githubusercontent.com/tdwg/rs.tdwg.org/practice/'

headerFileName = 'termlist-header.md'
footerFileName = 'termlist-footer.md'
outFileName = '../docs/ac.md'

# This is a Python list of the IRIs of the term lists to be included in the document.
termLists = ['audubon', 'exif-for-ac', 'xmp-for-ac', 'dwc-for-ac', 'dc-for-ac', 'dcterms-for-ac']
#termLists = ['pathway']

# NOTE! There may be problems unless every term list is of the same vocabulary type since the number of columns will differ
# However, there probably aren't any circumstances where mixed types will be used to generate the same page.
vocab_type = 1 # 1 is simple vocabulary, 2 is simple controlled vocabulary, 3 is c.v. with broader hierarchy

# Terms in large vocabularies like Darwin and Audubon Cores may be organized into categories using tdwgutility_organizedInClass
# If so, those categories can be used to group terms in the generated term list document.
organized_in_categories = True

# If organized in categories, the display_order list must contain the IRIs that are values of tdwgutility_organizedInClass
# If not organized into categories, the value is irrelevant. There just needs to be one item in the list.
display_order = ['http://rs.tdwg.org/dwc/terms/attributes/Management', 'http://rs.tdwg.org/dwc/terms/attributes/Attribution', 'http://purl.org/dc/terms/Agent', 'http://rs.tdwg.org/dwc/terms/attributes/ContentCoverage', 'http://purl.org/dc/terms/Location', 'http://purl.org/dc/terms/PeriodOfTime', 'http://rs.tdwg.org/dwc/terms/attributes/TaxonomicCoverage', 'http://rs.tdwg.org/dwc/terms/attributes/ResourceCreation', 'http://rs.tdwg.org/dwc/terms/attributes/RelatedResources', 'http://rs.tdwg.org/ac/terms/ServiceAccessPoint']
display_label = ['Management Vocabulary', 'Attribution Vocabulary', 'Agents Vocabulary', 'Content Coverage Vocabulary', 'Geography Vocabulary', 'Temporal Coverage Vocabulary', 'Taxonomic Coverage Vocabulary', 'Resource Creation Vocabulary', 'Related Resources Vocabulary', 'Service Access Point Vocabulary']
display_comments = ['','','','','Note that [dwc:locality](http://rs.tdwg.org/dwc/terms/locality) may be used, but as applied to media this term may be ambiguous as to whether it applies to the location depicted or the location at which the media was created. When disambiguating information is available, it is better to use the terms Location Shown and Location Created. The latter is in the Resource Creation Vocabulary.\n\nLocation Created and Location Shown are separated in the current version of IPTC, and the Metadata Working Group ([Metadata Working Group Guidelines for Handling Image Metadata, Version 2.0, November 2010](https://web.archive.org/web/20180919181934/http://www.metadataworkinggroup.org/pdf/mwg_guidance.pdf)) also recommends this. We follow this below in order to support the expected future increase of automatic GPS-based coordinate recording. As a special case, the AC group recommends to change the semantics of Location Shown in the case of biodiversity specimens, where the original location may differ from the current location at which the specimen is held in a collection. In this case, Location Shown should exclusively refer to the location where a specimen was originally collected (gathering or sampling location). Use Location Created to express the location where the resource was created (a specimen was digitized).\n\n','','','','','These terms are representation-dependent metadata, referring to specific digital representations of a resource (e.g., a specific resolution, quality, or format). They are used within whatever a particular AC implementation assigns to the value of `ac:hasServiceAccessPoint`, whose label is simply "Service Access Point." Note that it is possible for an implementation to use syntactic conventions that avoid direct use of `ac:hasServiceAccessPoint`, as illustrated in the final example in the section [Multiplicity/Cardinality in the Audubon Core Structure document](structure.md#3-multiplicity-and-cardinality).\n\n']
display_id = ['Management_Vocabulary', 'Attribution_Vocabulary', 'Agents_Vocabulary', 'Content_Coverage_Vocabulary', 'Geography_Vocabulary', 'Temporal_Coverage_Vocabulary', 'Taxonomic_Coverage_Vocabulary', 'Resource_Creation_Vocabulary', 'Related_Resources_Vocabulary', 'Service_Access_Point_Vocabulary']

#display_order = ['']
#display_label = ['Vocabulary'] # these are the section labels for the categories in the page
#display_comments = [''] # these are the comments about the category to be appended following the section labels
#display_id = ['Vocabulary'] # these are the fragment identifiers for the associated sections for the categories

# ---------------
# Function definitions
# ---------------

# replace URL with link
#
def createLinks(text):
    def repl(match):
        if match.group(1)[-1] == '.':
            return '<a href="' + match.group(1)[:-1] + '">' + match.group(1)[:-1] + '</a>.'
        return '<a href="' + match.group(1) + '">' + match.group(1) + '</a>'

    pattern = '(https?://[^\s,;\)"]*)'
    result = re.sub(pattern, repl, text)
    return result

In [143]:
term_lists_info = []

frame = pd.read_csv(githubBaseUri + 'term-lists/term-lists.csv', na_filter=False)
for termList in termLists:
    term_list_dict = {'list_iri': termList}
    term_list_dict = {'database': termList}
    for index,row in frame.iterrows():
        if row['database'] == termList:
            term_list_dict['pref_ns_prefix'] = row['vann_preferredNamespacePrefix']
            term_list_dict['pref_ns_uri'] = row['vann_preferredNamespaceUri']
            term_list_dict['list_iri'] = row['list']
    term_lists_info.append(term_list_dict)
print(term_lists_info)

[{'database': 'audubon', 'pref_ns_prefix': 'ac', 'pref_ns_uri': 'http://rs.tdwg.org/ac/terms/', 'list_iri': 'http://rs.tdwg.org/ac/terms/'}, {'database': 'exif-for-ac', 'pref_ns_prefix': 'exif', 'pref_ns_uri': 'http://ns.adobe.com/exif/1.0/', 'list_iri': 'http://rs.tdwg.org/ac/exif/'}, {'database': 'xmp-for-ac', 'pref_ns_prefix': 'xmp', 'pref_ns_uri': 'http://ns.adobe.com/xap/1.0/', 'list_iri': 'http://rs.tdwg.org/ac/xmp/'}, {'database': 'dwc-for-ac', 'pref_ns_prefix': 'dwc', 'pref_ns_uri': 'http://rs.tdwg.org/dwc/terms/', 'list_iri': 'http://rs.tdwg.org/ac/dwc/'}, {'database': 'dc-for-ac', 'pref_ns_prefix': 'dc', 'pref_ns_uri': 'http://purl.org/dc/elements/1.1/', 'list_iri': 'http://rs.tdwg.org/ac/dc/'}, {'database': 'dcterms-for-ac', 'pref_ns_prefix': 'dcterms', 'pref_ns_uri': 'http://purl.org/dc/terms/', 'list_iri': 'http://rs.tdwg.org/ac/dcterms/'}]


In [144]:
# Create column list
column_list = ['pref_ns_prefix', 'pref_ns_uri', 'term_localName', 'label', 'rdfs_comment', 'skos_scopeNote', 'dcterms_description', 'term_modified', 'term_deprecated', 'rdf_type']
#column_list = ['pref_ns_prefix', 'pref_ns_uri', 'term_localName', 'label', 'definition', 'usage', 'notes', 'term_modified', 'term_deprecated', 'type']
if vocab_type == 2:
    column_list += ['controlled_value_string']
elif vocab_type == 3:
    column_list += ['controlled_value_string', 'skos_broader']
if organized_in_categories:
    column_list.append('tdwgutility_organizedInClass')
column_list.append('version_iri')

# Create list of lists metadata table
table_list = []
for term_list in term_lists_info:
    # retrieve versions metadata for term list
    versions_url = githubBaseUri + term_list['database'] + '-versions/' + term_list['database'] + '-versions.csv'
    versions_df = pd.read_csv(versions_url, na_filter=False)
    
    # retrieve current term metadata for term list
    data_url = githubBaseUri + term_list['database'] + '/' + term_list['database'] + '.csv'
    frame = pd.read_csv(data_url, na_filter=False)
    for index,row in frame.iterrows():
        row_list = [term_list['pref_ns_prefix'], term_list['pref_ns_uri'], row['term_localName'], row['label'], row['rdfs_comment'], row['skos_scopeNote'], row['dcterms_description'], row['term_modified'], row['term_deprecated'], row['rdf_type']]
        #row_list = [term_list['pref_ns_prefix'], term_list['pref_ns_uri'], row['term_localName'], row['label'], row['definition'], row['usage'], row['notes'], row['term_modified'], row['term_deprecated'], row['type']]
        if vocab_type == 2:
            row_list += [row['controlled_value_string']]
        elif vocab_type == 3:
            if row['skos_broader'] =='':
                row_list += [row['controlled_value_string'], '']
            else:
                row_list += [row['controlled_value_string'], term_list['pref_ns_prefix'] + ':' + row['skos_broader']]
        if organized_in_categories:
            row_list.append(row['tdwgutility_organizedInClass'])

        # Borrowed terms really don't have implemented versions. They may be lacking values for version_status.
        # In their case, their version IRI will be omitted.
        found = False
        for vindex, vrow in versions_df.iterrows():
            if vrow['term_localName']==row['term_localName'] and vrow['version_status']=='recommended':
                found = True
                version_iri = vrow['version']
                # NOTE: the current hack for non-TDWG terms without a version is to append # to the end of the term IRI
                if version_iri[len(version_iri)-1] == '#':
                    version_iri = ''
        if not found:
            version_iri = ''
        row_list.append(version_iri)

        table_list.append(row_list)

# Turn list of lists into dataframe
terms_df = pd.DataFrame(table_list, columns = column_list)

terms_sorted_by_label = terms_df.sort_values(by='label')
terms_sorted_by_localname = terms_df.sort_values(by='term_localName')
terms_sorted_by_label

Unnamed: 0,pref_ns_prefix,pref_ns_uri,term_localName,label,rdfs_comment,skos_scopeNote,dcterms_description,term_modified,term_deprecated,rdf_type,tdwgutility_organizedInClass,version_iri
0,ac,http://rs.tdwg.org/ac/terms/,accessURI,Access URI,A URI that uniquely identifies a service that ...,If this resource can be acquired by an http re...,"Value might point to something offline, such a...",2020-01-27,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://rs.tdwg.org/ac/terms/ServiceAccessPoint,http://rs.tdwg.org/ac/terms/version/accessURI-...
1,ac,http://rs.tdwg.org/ac/terms/,associatedObservationReference,Associated Observation Reference,A reference to an observation associated with ...,,,2020-01-27,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://rs.tdwg.org/dwc/terms/attributes/Relate...,http://rs.tdwg.org/ac/terms/version/associated...
2,ac,http://rs.tdwg.org/ac/terms/,associatedSpecimenReference,Associated Specimen Reference,A reference to a specimen associated with this...,,"Supports finding a specimen resource, where ad...",2020-01-27,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://rs.tdwg.org/dwc/terms/attributes/Relate...,http://rs.tdwg.org/ac/terms/version/associated...
3,ac,http://rs.tdwg.org/ac/terms/,attributionLinkURL,Attribution Link URL,"The URL where information about ownership, att...",,This URL may be used in creating a clickable l...,2020-01-27,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://rs.tdwg.org/dwc/terms/attributes/Attrib...,http://rs.tdwg.org/ac/terms/version/attributio...
4,ac,http://rs.tdwg.org/ac/terms/,attributionLogoURL,Attribution URL,The URL of the icon or logo image to appear in...,,Entering this URL into a browser should only r...,2020-01-27,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://rs.tdwg.org/dwc/terms/attributes/Attrib...,http://rs.tdwg.org/ac/terms/version/attributio...
...,...,...,...,...,...,...,...,...,...,...,...,...
104,dwc,http://rs.tdwg.org/dwc/terms/,verbatimLatitude,Verbatim Latitude,The verbatim original latitude of the Location...,,,,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://purl.org/dc/terms/Location,
105,dwc,http://rs.tdwg.org/dwc/terms/,verbatimLocality,Verbatim Locality,The original textual description of the place.,,,,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://purl.org/dc/terms/Location,
106,dwc,http://rs.tdwg.org/dwc/terms/,verbatimLongitude,Verbatim Longitude,The verbatim original longitude of the Locatio...,,,,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://purl.org/dc/terms/Location,
107,dwc,http://rs.tdwg.org/dwc/terms/,verbatimSRS,Verbatim SRS,"The ellipsoid, geodetic datum, or spatial refe...",,,,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://purl.org/dc/terms/Location,


Run the following cell to generate an index sorted alphabetically by lowercase term local name. Omit this index if the terms have opaque local names.

In [145]:
# generate the index of terms grouped by category and sorted alphabetically by lowercase term local name

text = '### 3.1 Index By Term Name\n\n'
text += '(See also [3.2 Index By Label](#32-index-by-label))\n\n'
for category in range(0,len(display_order)):
    text += '**' + display_label[category] + '**\n'
    text += '\n'
    if organized_in_categories:
        filtered_table = terms_sorted_by_localname[terms_sorted_by_localname['tdwgutility_organizedInClass']==display_order[category]]
        filtered_table.reset_index(drop=True, inplace=True)
    else:
        filtered_table = terms_sorted_by_localname
        
    for row_index,row in filtered_table.iterrows():
        curie = row['pref_ns_prefix'] + ":" + row['term_localName']
        curie_anchor = curie.replace(':','_')
        text += '[' + curie + '](#' + curie_anchor + ')'
        if row_index < len(filtered_table) - 1:
            text += ' |'
        text += '\n'
    text += '\n'
index_by_name = text

print(index_by_name)

### 3.1 Index By Term Name

(See also [3.2 Index By Label](#32-index-by-label))

**Management Vocabulary**

[xmp:MetadataDate](#xmp_MetadataDate) |
[xmp:Rating](#xmp_Rating) |
[dcterms:available](#dcterms_available) |
[ac:commenter](#ac_commenter) |
[ac:commenterLiteral](#ac_commenterLiteral) |
[ac:comments](#ac_comments) |
[ac:hasServiceAccessPoint](#ac_hasServiceAccessPoint) |
[dcterms:identifier](#dcterms_identifier) |
[ac:metadataLanguage](#ac_metadataLanguage) |
[ac:metadataLanguageLiteral](#ac_metadataLanguageLiteral) |
[dcterms:modified](#dcterms_modified) |
[ac:providerManagedID](#ac_providerManagedID) |
[ac:reviewer](#ac_reviewer) |
[ac:reviewerComments](#ac_reviewerComments) |
[ac:reviewerLiteral](#ac_reviewerLiteral) |
[ac:subtype](#ac_subtype) |
[ac:subtypeLiteral](#ac_subtypeLiteral) |
[dcterms:title](#dcterms_title) |
[dc:type](#dc_type) |
[dcterms:type](#dcterms_type)

**Attribution Vocabulary**

[ac:attributionLinkURL](#ac_attributionLinkURL) |
[ac:attributionLogoURL](#

Run the following cell to generate an index by term label

In [146]:
text = '\n\n'

# Comment out the following two lines if there is no index by local names
text = '### 3.2 Index By Label\n\n'
text += '(See also [3.1 Index By Term Name](#31-index-by-term-name))\n\n'
for category in range(0,len(display_order)):
    if organized_in_categories:
        text += '**' + display_label[category] + '**\n'
        text += '\n'
        filtered_table = terms_sorted_by_label[terms_sorted_by_label['tdwgutility_organizedInClass']==display_order[category]]
        filtered_table.reset_index(drop=True, inplace=True)
    else:
        filtered_table = terms_sorted_by_label
        
    for row_index,row in filtered_table.iterrows():
        if row_index == 0 or (row_index != 0 and row['label'] != filtered_table.iloc[row_index - 1].loc['label']): # this is a hack to prevent duplicate labels
            curie_anchor = row['pref_ns_prefix'] + "_" + row['term_localName']
            text += '[' + row['label'] + '](#' + curie_anchor + ')'
            if row_index < len(filtered_table) - 2 or (row_index == len(filtered_table) - 2 and row['label'] != filtered_table.iloc[row_index + 1].loc['label']):
                text += ' |'
            text += '\n'
    text += '\n'
index_by_label = text

print(index_by_label)

### 3.2 Index By Label

(See also [3.1 Index By Term Name](#31-index-by-term-name))

**Management Vocabulary**

[Commenter](#ac_commenter) |
[Comments](#ac_comments) |
[Date Available](#dcterms_available) |
[Identifier](#dcterms_identifier) |
[Metadata Date](#xmp_MetadataDate) |
[Metadata Language](#ac_metadataLanguage) |
[Modified](#dcterms_modified) |
[Provider-managed ID](#ac_providerManagedID) |
[Rating](#xmp_Rating) |
[Reviewer](#ac_reviewer) |
[Reviewer Comments](#ac_reviewerComments) |
[Service Access Point](#ac_hasServiceAccessPoint) |
[Subtype](#ac_subtype) |
[Title](#dcterms_title) |
[Type](#dc_type)

**Attribution Vocabulary**

[Attribution Link URL](#ac_attributionLinkURL) |
[Attribution URL](#ac_attributionLogoURL) |
[Copyright Statement](#dcterms_rights) |
[Funding](#ac_fundingAttribution) |
[License Logo URL](#ac_licenseLogoURL) |
[Published Source](#dcterms_source)

**Agents Vocabulary**

[Creator](#dcterms_creator) |
[Metadata Creator](#ac_metadataCreatorLiteral) |
[Me

In [153]:
decisions_df = pd.read_csv('https://raw.githubusercontent.com/tdwg/rs.tdwg.org/master/decisions/decisions-links.csv', na_filter=False)

# generate a table for each term, with terms grouped by category

# generate the Markdown for the terms table
text = '## 4 Vocabulary\n'
for category in range(0,len(display_order)):
    if organized_in_categories:
        text += '### 4.' + str(category + 1) + ' ' + display_label[category] + '\n'
        text += '\n'
        text += display_comments[category] # insert the comments for the category, if any.
        filtered_table = terms_sorted_by_localname[terms_sorted_by_localname['tdwgutility_organizedInClass']==display_order[category]]
        filtered_table.reset_index(drop=True, inplace=True)
    else:
        filtered_table = terms_sorted_by_localname

    for row_index,row in filtered_table.iterrows():
        text += '<table>\n'
        curie = row['pref_ns_prefix'] + ":" + row['term_localName']
        curieAnchor = curie.replace(':','_')
        text += '\t<thead>\n'
        text += '\t\t<tr>\n'
        text += '\t\t\t<th colspan="2"><a id="' + curieAnchor + '"></a>Term Name  ' + curie + '</th>\n'
        text += '\t\t</tr>\n'
        text += '\t</thead>\n'
        text += '\t<tbody>\n'
        text += '\t\t<tr>\n'
        text += '\t\t\t<td>Term IRI</td>\n'
        uri = row['pref_ns_uri'] + row['term_localName']
        text += '\t\t\t<td><a href="' + uri + '">' + uri + '</a></td>\n'
        text += '\t\t</tr>\n'
        text += '\t\t\t<td>Modified</td>\n'
        text += '\t\t\t<td>' + row['term_modified'] + '</td>\n'
        text += '\t\t</tr>\n'

        if row['version_iri'] != '':
            text += '\t\t<tr>\n'
            text += '\t\t\t<td>Term version IRI</td>\n'
            text += '\t\t\t<td><a href="' + row['version_iri'] + '">' + row['version_iri'] + '</a></td>\n'
            text += '\t\t</tr>\n'

        text += '\t\t<tr>\n'
        text += '\t\t\t<td>Label</td>\n'
        text += '\t\t\t<td>' + row['label'] + '</td>\n'
        text += '\t\t</tr>\n'

        if row['term_deprecated'] != '':
            text += '\t\t<tr>\n'
            text += '\t\t\t<td></td>\n'
            text += '\t\t\t<td><strong>This term is deprecated and should no longer be used.</strong></td>\n'
            text += '\t\t</tr>\n'

        text += '\t\t<tr>\n'
        text += '\t\t\t<td>Definition</td>\n'
        text += '\t\t\t<td>' + row['rdfs_comment'] + '</td>\n'
        #text += '\t\t\t<td>' + row['definition'] + '</td>\n'
        text += '\t\t</tr>\n'

        if row['skos_scopeNote'] != '':
        #if row['usage'] != '':
            text += '\t\t<tr>\n'
            text += '\t\t\t<td>Usage</td>\n'
            text += '\t\t\t<td>' + createLinks(row['skos_scopeNote']) + '</td>\n'
            #text += '\t\t\t<td>' + createLinks(row['usage']) + '</td>\n'
            text += '\t\t</tr>\n'

        if row['dcterms_description'] != '':
        #if row['notes'] != '':
            text += '\t\t<tr>\n'
            text += '\t\t\t<td>Notes</td>\n'
            text += '\t\t\t<td>' + createLinks(row['dcterms_description']) + '</td>\n'
            #text += '\t\t\t<td>' + createLinks(row['notes']) + '</td>\n'
            text += '\t\t</tr>\n'

        if vocab_type == 2 or vocab_type ==3: # controlled vocabulary
            text += '\t\t<tr>\n'
            text += '\t\t\t<td>Controlled value</td>\n'
            text += '\t\t\t<td>' + row['controlled_value_string'] + '</td>\n'
            text += '\t\t</tr>\n'

        if vocab_type == 3 and row['skos_broader'] != '': # controlled vocabulary with skos:broader relationships
            text += '\t\t<tr>\n'
            text += '\t\t\t<td>Has broader concept</td>\n'
            curieAnchor = row['skos_broader'].replace(':','_')
            text += '\t\t\t<td><a href="#' + curieAnchor + '">' + row['skos_broader'] + '</a></td>\n'
            text += '\t\t</tr>\n'

        text += '\t\t<tr>\n'
        text += '\t\t\t<td>Type</td>\n'
        if row['rdf_type'] == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property':
        #if row['type'] == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property':
            text += '\t\t\t<td>Property</td>\n'
        elif row['rdf_type'] == 'http://www.w3.org/2000/01/rdf-schema#Class':
        #elif row['type'] == 'http://www.w3.org/2000/01/rdf-schema#Class':
            text += '\t\t\t<td>Class</td>\n'
        elif row['rdf_type'] == 'http://www.w3.org/2004/02/skos/core#Concept':
        #elif row['type'] == 'http://www.w3.org/2004/02/skos/core#Concept':
            text += '\t\t\t<td>Concept</td>\n'
        else:
            text += '\t\t\t<td>' + row['rdf_type'] + '</td>\n' # this should rarely happen
            #text += '\t\t\t<td>' + row['type'] + '</td>\n' # this should rarely happen
        text += '\t\t</tr>\n'

        # Look up decisions related to this term
        for drow_index,drow in decisions_df.iterrows():
            if drow['linked_affected_resource'] == uri:
                text += '\t\t<tr>\n'
                text += '\t\t\t<td>Executive Committee decision</td>\n'
                text += '\t\t\t<td><a href="http://rs.tdwg.org/decisions/' + drow['decision_localName'] + '">http://rs.tdwg.org/decisions/' + drow['decision_localName'] + '</a></td>\n'
                text += '\t\t</tr>\n'                        

        text += '\t</tbody>\n'
        text += '</table>\n'
        text += '\n'
    text += '\n'
term_table = text

print(term_table)

## 4 Vocabulary
### 4.1 Management Vocabulary

<table>
	<thead>
		<tr>
			<th colspan="2"><a id="xmp_MetadataDate"></a>Term Name  xmp:MetadataDate</th>
		</tr>
	</thead>
	<tbody>
		<tr>
			<td>Term IRI</td>
			<td><a href="http://ns.adobe.com/xap/1.0/MetadataDate">http://ns.adobe.com/xap/1.0/MetadataDate</a></td>
		</tr>
			<td>Modified</td>
			<td>2020-01-27</td>
		</tr>
		<tr>
			<td>Label</td>
			<td>Metadata Date</td>
		</tr>
		<tr>
			<td>Definition</td>
			<td>The date and time that any metadata for this resource was last changed. It should be the same as or more recent than xmp:ModifyDate.</td>
		</tr>
		<tr>
			<td>Usage</td>
			<td>Point in time recording when the last modification to metadata (not necessarily the media object itself) occurred. The date and time MUST comply with the World Wide Web Consortium (W3C) datetime practice, <a href="https://www.w3.org/TR/NOTE-datetime">https://www.w3.org/TR/NOTE-datetime</a>, which requires that date and time representation correspond

Modify to display the indices that you want

In [154]:
#text = index_by_label + term_table
text = index_by_name + index_by_label + term_table

In [155]:
# read in header and footer, merge with terms table, and output

headerObject = open(headerFileName, 'rt', encoding='utf-8')
header = headerObject.read()
headerObject.close()

footerObject = open(footerFileName, 'rt', encoding='utf-8')
footer = footerObject.read()
footerObject.close()

output = header + text + footer
outputObject = open(outFileName, 'wt', encoding='utf-8')
outputObject.write(output)
outputObject.close()
    
print('done')

done
