In [1]:
# import packages
import re
import requests
import csv
import yaml
import json
import pandas as pd
import markdown


In [2]:
# load config
config = yaml.load(open('config.yaml', newline=''), Loader=yaml.FullLoader)
config

{'headerFileName': 'termlist-header.md',
 'footerFileName': 'termlist-footer.md',
 'outFileName': '../master/README.md',
 'termLists': [{'filename': 'tcs',
   'vann_preferredNamespacePrefix': 'tcs',
   'vann_preferredNamespaceUri': 'http://rs.tdwg.org/tcs/terms/'},
  {'filename': 'dwc-for-tcs',
   'vann_preferredNamespacePrefix': 'dwc',
   'vann_preferredNamespaceUri': 'http://rs.tdwg.org/dwc/terms/'}],
 'vocab_type': 1,
 'organized_in_categories': True,
 'categories': [{'namespace': 'http://rs.tdwg.org/tcs/terms/TaxonConcept',
   'label': 'Taxon Concept',
   'comments': '',
   'display_id': 'taxonConcept'},
  {'namespace': 'http://rs.tdwg.org/tcs/terms/TaxonRelationship',
   'label': 'Taxon Relationship',
   'comments': '',
   'display_id': 'taxonRelationship'},
  {'namespace': 'http://rs.tdwg.org/tcs/terms/TaxonName',
   'label': 'Taxon Name',
   'comments': '',
   'display_id': 'taxonName'},
  {'namespace': 'http://rs.tdwg.org/tcs/terms/NomenclaturalType',
   'label': 'Nomenclatural

In [3]:
# create data frame with terms
def yaml_to_df(filename):
    f = open('../master/{filename}.yaml'.format(filename = filename), newline='')
    data = yaml.load(f, Loader=yaml.FullLoader)
    f.close()
    return pd.DataFrame.from_dict(data)

for index, list in enumerate(config['termLists']):
    df = yaml_to_df(list['filename'])
    df['namespace'] = list['vann_preferredNamespaceUri']
    df['namespaceAlias'] = list['vann_preferredNamespacePrefix']
    if index == 0:
        merged_df = df
    else:
        merged_df = pd.concat([merged_df, df])

merged_df

Unnamed: 0,localName,label,definition,usage,notes,examples,type,organizedInClass,required,repeatable,namespace,namespaceAlias
0,TaxonConcept,Taxon Concept,"The underlying meaning, or referential extensi...",,,,http://www.w3.org/2000/01/rdf-schema#Class,http://rs.tdwg.org/tcs/terms/TaxonConcept,,,http://rs.tdwg.org/tcs/terms/,tcs
1,taxonConceptCategory,Taxon Concept Category,The category of Taxon Concept,,,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://rs.tdwg.org/tcs/terms/TaxonConcept,False,False,http://rs.tdwg.org/tcs/terms/,tcs
2,taxonName,Taxon Name,The Taxon Name for this Taxonomic Name Usage,,,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://rs.tdwg.org/tcs/terms/TaxonConcept,False,False,http://rs.tdwg.org/tcs/terms/,tcs
3,accordingTo,According To,"Reference to the source of this concept, which...",,,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://rs.tdwg.org/tcs/terms/TaxonConcept,False,False,http://rs.tdwg.org/tcs/terms/,tcs
4,accordingToString,According To String,String representation of accordingTo,,,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://rs.tdwg.org/tcs/terms/TaxonConcept,False,False,http://rs.tdwg.org/tcs/terms/,tcs
5,parent,Parent,"The direct, most proximate higher-rank parent ...",,,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://rs.tdwg.org/tcs/terms/TaxonConcept,False,False,http://rs.tdwg.org/tcs/terms/,tcs
6,synonym,Synonym,Name considered to apply to the same taxon as ...,,,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://rs.tdwg.org/tcs/terms/TaxonConcept,False,True,http://rs.tdwg.org/tcs/terms/,tcs
7,vernacularName,Vernacular Name,"Common or vernacular name, used as an alternat...",,A name is only a vernacular name if it is used...,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://rs.tdwg.org/tcs/terms/TaxonConcept,False,True,http://rs.tdwg.org/tcs/terms/,tcs
8,characterCircumscription,Character Circumscription,A set of taxonomic descriptions used to define...,,,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://rs.tdwg.org/tcs/terms/TaxonConcept,False,?,http://rs.tdwg.org/tcs/terms/,tcs
9,specimenCircumscription,Specimen Circumscription,A set of specimens that are used to define the...,,,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,http://rs.tdwg.org/tcs/terms/TaxonConcept,False,?,http://rs.tdwg.org/tcs/terms/,tcs


In [4]:
# create index of terms

def create_index(config, merged_df):
    text = '### Index of terms\n\n'
    
    text += '**classes**\n\n'
    items = []
    for index, row in merged_df[merged_df['type'].str.contains('Class')].iterrows():
        label = '{namespaceAlias}:{localName}'.format(namespaceAlias = row['namespaceAlias'], localName = row['localName'])
        anchor = '#{namespaceAlias}_{localName}'.format(namespaceAlias = row['namespaceAlias'], localName = row['localName'])
        item = '[{label}]({anchor})'.format(label = label, anchor = anchor)
        items.append(item)
    text += ' | '.join(items) + '\n\n'

    for category in config['categories']:
        text += '**{label}**\n\n'.format(label = category['label'])
        filtered_df= merged_df[merged_df['organizedInClass'] == category['namespace']]
        items = []
        for index, row in filtered_df.iterrows():
            label = '{namespaceAlias}:{localName}'.format(namespaceAlias = row['namespaceAlias'], localName = row['localName'])
            anchor = '#{namespaceAlias}_{localName}'.format(namespaceAlias = row['namespaceAlias'], localName = row['localName'])

            if 'Property' in row['type']:
                item = '[{label}]({anchor})'.format(label = label, anchor = anchor)
                items.append(item)
        text += ' | '.join(items) + '\n\n'

    return text

# print(create_index(config, merged_df))


    

In [5]:
# Create vocabulary
def table_cell(content, celltype='td', colspan=1):
    if colspan == 1:
        return '\t\t\t<{celltype}>{content}</{celltype}>'.format(content = content, celltype = celltype)
    else:
        return '\t\t\t<{celltype} colspan="{colspan}">{content}</{celltype}>'.format(content = content, celltype = celltype, colspan = colspan)

def table_row(cells):
    return '\t\t<tr>\n{cells}\n\t\t</tr>\n'.format(cells = '\n'.join(cells))

def term_table(term):
    text = '<table>\n'
    
    # table header
    curie = '{namespaceAlias}:{localName}'.format(namespaceAlias = term['namespaceAlias'], localName=term['localName'])
    curieAnchor = curie.replace(':', '_')
    tableHeader = '<a id="{curieAnchor}"></a>{curie}'.format(curie = curie, curieAnchor = curieAnchor)
    text += '\t<thead>\n'
    text += table_row([table_cell(tableHeader, celltype='th', colspan=2)])
    text += '\t</thead>\n'
    
    text += '\t<tbody>\n'
    
    # URI
    uri = '{namespace}{localName}'.format(namespace = term['namespace'], localName = term['localName'])
    text += table_row([
        table_cell('URI'), 
        table_cell(uri)
    ])
    
    # Label
    text += table_row([
        table_cell('Label'), 
        table_cell(term['label'])
    ])
    
    # Attributes
    if term['required'] is not None:
        required = "Yes" if term['required'] else "No"
        repeatable = "Yes" if term['repeatable'] else "No"
        attrs = '<b>required:</b> {required} — <b>repeatable:</b> {repeatable}'.format(required = required, repeatable = repeatable)
        text += table_row([
            table_cell(''), 
            table_cell(attrs)
        ])

    # Definition
    text += table_row([
        table_cell('Definition'), 
        table_cell(markdown.markdown(term['definition']))])
    
    # Usage
    usage = term['usage'] if term['usage'] else ""
    text += table_row([
        table_cell('Usage'), 
        table_cell(markdown.markdown(usage))
    ])
    
    # Comments/Notes
    comments = term['notes'] if term['notes'] else ""
    text += table_row([
        table_cell('Comments'), 
        table_cell(markdown.markdown(comments))
    ])
    
    text += '\t</tbody>\n'
    text += '</table>\n\n'
    return text

def create_vocab(config, merged_df):
    vocab = '### Vocabulary\n\n'
    for category in config['categories']:
        vocab += '#### {label}\n\n'.format(label = category['label'])
        filtered_df = merged_df[merged_df['organizedInClass'] == category['namespace']]
        for index, row in filtered_df.iterrows():
            vocab += term_table(row)
    return vocab

# testing
# print(table_cell('Hello'))
# print(table_cell('Hello', colspan=2, celltype='th'))
# print(table_row([table_cell('Modified'), table_cell('2021-10-19')]))

# term = {
#     "namespace": "http://rs.tdwg.org/tcs/terms/",
#     "namespaceAlias": "tcs",
#     "localName": "TaxonConcept",
#     "label": "Taxon Concept",
#     "definition": """The underlying meaning, or referential extension, of a scientific name  as 
#         stated by a particular author in a particular publication. It represents the 
#         author's full-blown view of how the name reaches out to observed or 
#         unobserved objects in nature (beyond statements about type specimens). It is 
#         a direct reflection of what has been written, illustrated, and deposited by 
#         a taxonomist, regardless of his or her theoretical orientation (Franz & Peet 
#         2009).""",
#     "usage": None,
#     "notes": None,
#     "required": None,
#     "repeatable": None
# }
# print(term_table(term))

print(create_vocab(config, merged_df))
    

### Vocabulary

#### Taxon Concept

<table>
	<thead>
		<tr>
			<th colspan="2"><a id="tcs_TaxonConcept"></a>tcs:TaxonConcept</th>
		</tr>
	</thead>
	<tbody>
		<tr>
			<td>URI</td>
			<td>http://rs.tdwg.org/tcs/terms/TaxonConcept</td>
		</tr>
		<tr>
			<td>Label</td>
			<td>Taxon Concept</td>
		</tr>
		<tr>
			<td>Definition</td>
			<td><p>The underlying meaning, or referential extension, of a scientific name  as  stated by a particular author in a particular publication. It represents the  author's full-blown view of how the name reaches out to observed or  unobserved objects in nature (beyond statements about type specimens). It is  a direct reflection of what has been written, illustrated, and deposited by  a taxonomist, regardless of his or her theoretical orientation (Franz &amp; Peet  2009).</p></td>
		</tr>
		<tr>
			<td>Usage</td>
			<td></td>
		</tr>
		<tr>
			<td>Comments</td>
			<td></td>
		</tr>
	</tbody>
</table>

<table>
	<thead>
		<tr>
			<th colspan="2"><a id="tcs_taxonC

In [7]:
# create output file

term_index = create_index(config, merged_df)
vocab = create_vocab(config, merged_df)
text = term_index + vocab

headerObject = open(config['headerFileName'], 'rt', encoding='utf-8')
header = headerObject.read()
headerObject.close()

footerObject = open(config['footerFileName'], 'rt', encoding='utf-8')
footer = footerObject.read()
footerObject.close()

output = header + text + footer
outputObject = open(config['outFileName'], 'wt', encoding='utf-8')
outputObject.write(output)
outputObject.close()