
# Perseus TEI XML to Text-Fabric Convertor

XML-TEI textfiles can be converted to [Text-Fabric format](https://dans-labs.github.io/text-fabric/Model/File-formats/) by using this convertor. It has been designed for Greek, but it should also work with minimal adjustments for other languages (exept for the implemented lemmatizer).

See this [readme](https://github.com/pthu/patristics) for more information about the corpus and this work.

See this [notebook](https://nbviewer.jupyter.org/github/annotation/banks/blob/master/programs/convert.ipynb) for a simple setup for a tf conversion if you like to build your own convertor.

In [1]:
import glob,os,re
import collections
import pickle
import betacode.conv

from collections import OrderedDict, namedtuple
from itertools import takewhile
from ordered_set import OrderedSet
from unicodedata import category, normalize
from tf.fabric import Fabric, Timestamp
from tf.convert.walker import CV
from pprint import pprint
from cltk.corpus.greek.beta_to_unicode import Replacer
from cltk.corpus.greek.alphabet import filter_non_greek
from greek_normalisation.normalise import Normaliser
from greek_normalisation.norm_data import ELISION, MOVABLE

# Local imports
from helpertools.lemmatizer import lemmatize
from helpertools.unicodetricks import *


In [2]:
REPO1 = '~/github/pthu'
REPO2 = REPO1 + '/sources'
VERSION = '1.0'

# Define subcorpus to convert
SOURCE = 'greek_sources'
LOC = 'greek_literature'
# Define the source where the (sub)corpus can be found
SRC_DIR = os.path.expanduser(f'{REPO2}/{SOURCE}')
# Define the export path
TF_DIR = os.path.expanduser(f'{REPO1}/{LOC}') 
# Define the version of the export


# Setup of the Convertor Environment



In [3]:
# RE PATTERNS METADATA
authorRE = re.compile(r'<author>|<author .+?>') #[^>]*?(.+)</author>')
editorRE = re.compile(r'<editor>|<editor .+?>') #[^>]*?(.+)</editor>')
titleRE = re.compile(r'<title>|<title .+?>')    #[^>]*?(.+)</title>')
bodyStartRE = re.compile(r'<body>|<body .+?>')
bodyStopRE = re.compile(r'</body>|</body .+?>')

# RE PATTERNS BODY
openTagRE = re.compile(r'<[^/= ]+?>')
closeTagRE = re.compile(r'</.+?>')
opencloseTagRE = re.compile(r'<[^=/]+?/>')
openAttrTagRE = re.compile(r'<.+?=[^/]+?>') #openAttrTagRE = re.compile(r'<.+?=.*?[^/]>')
closedAttrTagRE = re.compile(r'<.+?=.+?/>')
commentFullRE = re.compile(r'^<!--.*?-->$')
commentStartRE = re.compile(r'^<!--.*')
commentStopRE = re.compile(r'.*-->$')

# Load Lemmatizer
lemmatizer_open = open(os.path.expanduser(f'{REPO1}/tei_to_tf/helpertools/data/lemmatizer.pickle'), 'rb')
lemmatizer = pickle.load(lemmatizer_open)

# Set up James Tauber's normalizer
jt_normalise = Normaliser().normalise
# use: jt_normalise('greek_word') --> (greek_word, [type])

# Set up betacode to unicode convertor
beta_to_uni = Replacer()
#use: beta_to_uni.beta_code(betacode_text)

# Unicode standards
NFD = 'NFD'
NFC = 'NFC'

class Conversion:
    def __init__(self, path):
        self.path = path
        self.data = self.dataPreprocessor(self.path)
        self.metadata = self.metaData(self.data)[0]
        self.body_index = self.metaData(self.data)[1]
        self.attribs_dict = self.attribsDict(self.data)[0]
        self.opentags = self.attribsDict(self.data)[1]
        self.len_attribs_dict = self.lenAttribsDict(self.attribs_dict)        
        self.section_elems =  self.sectionElems(self.attribs_dict)

        # TF SPECIFIC VARIABLES
        self.slotType = 'word'
        self.intFeatures = set()
        self.generic = {'name': 'Patristic corpus from Perseus',
                        'compiler': 'Ernst Boogert',
                        'institution': 'Protestant Theological University (PThU) Amsterdam/Groningen, The Netherlands',
                        'source1': 'Perseus Digital Library',
                        'source2': 'Open Greek and Latin Project',
                        'url1': 'https://github.com/PerseusDL/canonical-greekLit',
                        'url2': 'https://github.com/OpenGreekAndLatin/First1KGreek',
                        'lang': 'grc',
                        'license': self.metadata['license'] if 'license' in self.metadata else 'not provided by source',
                         'availableStructure': ",".join(self.section_elems),
                        'version': '1.0',
                        'purpose': 'Making Perseus TEI formatted text available in TF',
                        'status': 'initial unchecked conversion',
                        '_book': self.metadata['titleStmt']['title'].replace('(Greek)', '').replace('.', '').replace(',', '').replace('Machine readable text', '').strip(),
                        'author': self.metadata['titleStmt']['author'] \
                                    if 'author' in self.metadata['titleStmt'] else 'not provided',
                        'editor': self.metadata['titleStmt']['editor'] \
                                    if 'editor' in self.metadata['titleStmt'] else 'not provided',
                        'edition': ', '.join([v for k, v in self.metadata['biblStruct'].items()]) + '.',
                        }
        # Definition of text formats
        self.otext = {'fmt:text-orig-full': '{orig} ',
                      'fmt:text-orig-main': '{main} ',
                      'fmt:text-orig-norm': '{norm} ',
                      'fmt:text-orig-plain': '{plain} ',
#                       'fmt:text-orig-beta-full': '{beta_orig} ',
                      'fmt:text-orig-beta-plain': '{beta_plain} ',
                      
                      'fmt:text-orig-lemma': '{lemma} ',
        # Definition of:
                      # main sections (=bookname + first two section levels)
                      'sectionTypes': f'_book{"" if not self.section_elems else "," + ",".join(self.section_elems[:2])}',
                      'sectionFeatures': f'_book{"" if not self.section_elems else "," + ",".join(self.section_elems[:2])}',
                      # structure (=bookname +all available levels)          
                      'structureTypes': f'_book{"" if not self.section_elems else "," + ",".join(self.section_elems[:])}',
                      'structureFeatures': f'_book{"" if not self.section_elems else "," + ",".join(self.section_elems[:])}',
                      }
        # These are the feature metadata that are present in all tf-packages to be produced... 
        # Other metadata will be added during the run of the director()...
        self.featureMeta = {
                '_sentence': {
                    'description': 'numbering of sentences with "." as its delimiter',
                },
                '_book': {
                    'description': 'the title of the book',
                },
                'orig': {
                    'description': 'the original form of the text in unicode (UFD norm), including accents and punctuation; if the original text was in betacode, it has been converted to unicode without any normalization',
                },
                'main': {
                    'description': 'the original form of the text in unicode (UFD norm), but extensively normalized (no punctuation and other trailing characters, no elision, normalization of accents.',
                },
                'norm': {
                    'description': 'a normalized form of uni_main, according to the normalization of James Tauber: https://github.com/jtauber/greek-normalisation',
                },
                'plain': {
                    'description': 'the plain form of the text in unicode stripped of all accents and punctuation',
                },
#                 'beta_orig': {
#                     'description': 'the original form of the text, including accents and punctuation; this could be unicode or betacode, depending on the format of in the source',
#                 },
#                 'beta_main': {
#                     'description': 'the original form of the text in betacode, but excluding punctuation and other trailing characters',
#                 },
                'beta_plain': {
                    'description': 'the plain form of the text in betacode stripped of all accents and punctuation',
                },
                'lemma': {
                    'description': 'the lemmatized form of the text tries to return as much as possible the words as a comma-separated list of possible lemmata. If no lemma could be found, the word is preceded by a "*". The lemmata have been defined by using the normalized text',
                },
            }
        
    
    def dataPreprocessor(self, path):
        with open(path) as xml:
            data = ' '.join([line.strip() for line in xml.readlines()])\
                      .replace('<', '#!#<')\
                      .replace('>', '>#!#')\
                      .split('#!#')
        return data
        
    def metaData(self, data):
        titleStmt = False
        biblStruct = False
        metaTaglist = []
        metadata = {}
        Comment = False
        for elem in data:
            if bodyStartRE.match(elem):
                body_index = data.index(elem) + 1
                break
            elem = elem.strip('{ ,.}')
            if Comment == False:
                if elem == '':
                    continue
                elif commentStartRE.fullmatch(elem):
                    if commentFullRE.fullmatch(elem):
                        continue
                    Comment = True
                    continue
                elif 'license' in elem.lower() or 'creative commons' in elem.lower():
                    metadata['license'] = elem
                    continue
                else:
                    if elem.startswith('<titleStmt'):
                        titleStmt = True
                        metadata['titleStmt'] = {}
                    elif elem.startswith('</titleStmt'):
                        titleStmt = False
                    elif elem.startswith(('<biblStruct', '<sourceDesc')):
                        biblStruct = True
                        metadata['biblStruct'] = {}
                    elif elem.startswith(('</biblStruct', '</sourceDesc')):
                        biblStruct = False
                    elif titleStmt == True:
                        if elem.startswith('<'):
                            tag_split = elem.find(' ') if not elem.find(' ') == -1 else elem.find('>')
                            metaTaglist.append(elem[1:tag_split])
                        else:
                            if metaTaglist[-1] in metadata['titleStmt']:
                                metadata['titleStmt'][metaTaglist[-1]] += \
                                  f', {elem}' if not elem in metadata['titleStmt'][metaTaglist[-1]] else ''
                            else:
                                metadata['titleStmt'][metaTaglist[-1]] = elem
                    elif biblStruct == True:
                        if elem.startswith('<'):
                            tag_split = elem.find(' ') if not elem.find(' ') == -1 else elem.find('>')
                            metaTaglist.append(elem[1:tag_split])
                        else:
                            if metaTaglist[-1] in metadata['biblStruct']:
                                metadata['biblStruct'][metaTaglist[-1]] += \
                                  f', {elem}' if not elem in metadata['biblStruct'][metaTaglist[-1]] else ''
                            else:
                                metadata['biblStruct'][metaTaglist[-1]] = elem
            else:
                if commentStopRE.fullmatch(elem):
                    Comment = False
                continue
        return (metadata, body_index)
    
    def attribsDict(self, data):
        attribs_dict = {}
        opentags = set()
        Comment = False
        for elem in data[self.body_index:]:
            elem = elem.strip()
            if Comment == False:
                if elem == '':
                    continue
                elif commentStartRE.fullmatch(elem):
                    if commentFullRE.fullmatch(elem):
                        continue
                    Comment = True
                    continue
                elif openAttrTagRE.fullmatch(elem):
                    elem = re.sub(r'\s*=\s*"\s*', '="', elem)
                    tag_split = elem.find(' ')
                    tag = elem[1:tag_split]
                    if tag.startswith('div'): 
                        tag = 'div'
                    attribs = {key: val.strip() for key, val in [elem.split('="') for elem in elem[tag_split:-1].strip().split('" ')]}
                    for key, val in attribs.items():
                        if val.strip('"') in CORR_ATTRIB_VALS:
                            attribs[key] = CORR_ATTRIB_VALS[val.strip('"')]
                        else:
                            attribs[key] = val.strip('"')
                    if 'type' in attribs:
                        if attribs['type'] == 'edition':
                            continue
                    tag_name = tuple((tag, tuple(key for key in attribs.keys() if key not in {'corresp', 'merge',})))
                    if tag_name in attribs_dict:
                        for attrib in attribs:
                            if attrib in attribs_dict[tag_name]:
                                attribs_dict[tag_name][attrib].add(attribs[attrib])
                            else:
                                attribs_dict[tag_name][attrib] = OrderedSet([attribs[attrib]])
                    else:
                        attribs_dict[tag_name] = {k: OrderedSet([v]) for k, v in attribs.items()}
                    opentags.add(tag_name)
                elif closedAttrTagRE.fullmatch(elem):
                    elem = re.sub(r'\s*=\s*"\s*', '="', elem)  
                    tag_split = elem.find(' ')
                    tag = elem[1:tag_split]
                    if tag.startswith('div'):
                        tag = 'div'
                    attribs = {key: val.strip() for key, val in [elem.split('="') for elem in elem[tag_split:-2].strip().split('" ')]}
                    for key, val in attribs.items():
                        if val.strip('"') in CORR_ATTRIB_VALS:
                            attribs[key] = CORR_ATTRIB_VALS[val.strip('"')]
                        else:
                            attribs[key] = val.strip('"')
                    tag_name = tuple((tag, tuple(key for key in attribs.keys() if key not in {'corresp', 'merge',})))
                    if tag_name in attribs_dict:
                        for attrib in attribs:
                            if attrib in attribs_dict[tag_name]:
                                attribs_dict[tag_name][attrib].add(attribs[attrib])
                            else:
                                attribs_dict[tag_name][attrib] = OrderedSet([attribs[attrib]])
                    else:
                        attribs_dict[tag_name] = {k: OrderedSet([v]) for k, v in attribs.items()}
                else:
                    continue        
            else:
                if commentStopRE.fullmatch(elem):
                    Comment = False
                continue
#         pprint(attribs_dict)
        return attribs_dict, opentags
    
    def lenAttribsDict(self, dictionary):
        return {key: {k: len(v) for k, v in val.items()} for key, val in dictionary.items()}
    
    def sectionElems(self, dictionary):
        section_list = []
        nonSections = nonSections = {'altpage', 'altnumbering', 'altref', 'mspage',}
        for key, val in dictionary.items():
            if key[0].startswith('div'):
                number = False
                sections = False
                for k, v in val.items():
                    if len(v) > 1 and sorted(v)[0][0].isdigit(): # The sorted guarantees that the numbers are in front
                        number = True
                    elif len(v) >= 1 and not sorted(v)[0][0].isdigit() and not v[0].startswith(('urn', 'textpart')):
                        section_list = list(v)
                        sections = True
                if number == True and sections == True: # Identification of sectioning units
                    break
        if len(section_list) <= 2:
            for key, val in dictionary.items():
                if key[0].startswith('milestone') and all(i in key[1] for i in ('unit', 'n')):
                    if sorted(val['n'])[0].isdigit():
                        section_list.extend([i for i in val['unit'] if not i in nonSections])
#         pprint(section_list)
        return section_list
    
    def director(self, cv):
        tm = Timestamp()  
        Comment = False
        NegatedEditionTag = False
        nonIntFeatures = {'otype', 'oslots',}
        excludeTags = {'head', 'note', 'title', 'bibl'}
        counter = dict(_sentence=0, word=0)
        cur = {}
        tagList = []
        closedSectionList = []
        data = self.data
        lemma_counter = [0, 0]
        
        tagList.append('_book')
        cur['_book'] = cv.node('_book')
        cv.feature(cur['_book'], _book=self.generic['_book'])
        nonIntFeatures.add('_book')

        for elem in data[self.body_index:]:
            elem = elem.strip()
            if Comment == False:
                if commentStartRE.fullmatch(elem): #DONE
                    if commentFullRE.fullmatch(elem):
                        continue
                    Comment = True
                    continue

                elif openTagRE.fullmatch(elem): #DONE
#                     print(f'openTagRE = {elem}')
                    # These are the features linked to the coming nodes
                    tag_name = elem[1:-1]
                    tagList.append(tag_name)
                    if tag_name in cur:
                        cv.terminate(cur[tag_name])
                    if not tag_name in excludeTags:
                        if tag_name in counter:
                            counter[tag_name] +=1
                        else:
                            counter[tag_name] = 1    
                        cur[tag_name] = cv.node(tag_name)
                        cv.feature(cur[tag_name], **{tag_name: counter[tag_name]})
                        cv.meta(tag_name, description="open tag without further specification. See the name of the .tf-file for it's meaning",)
                    else:
                        if tag_name in cur: cv.terminate(cur[tag_name])
                        cur[tag_name] = cv.node(tag_name)
                    continue

                elif closeTagRE.fullmatch(elem): #DONE
#                     print(f'closeTagRE = {elem}')
                    # These are the signs showing the close of a feature belonging to preceding nodes
                    if bodyStopRE.fullmatch(elem):
                        if NegatedEditionTag == True:
                            for ntp in cur:
                                if not ntp in self.section_elems and not ntp == '_book':
                                    cv.terminate(cur[ntp])
                            for ntp in self.section_elems[::-1]:
                                cv.terminate(cur[ntp])
                            cv.terminate(cur['_book'])
                            break
                        else:
                            for ntp in cur:
                                if not ntp in self.section_elems and not ntp == '_book':
                                    cv.terminate(cur[ntp])
                            for ntp in self.section_elems[::-1]:
                                cv.terminate(cur[ntp])
                            cv.terminate(cur['_book'])
                            del tagList[-1]
                            break
                    if tagList[-1] in excludeTags:
                        pass
                    elif tagList[-1] in self.section_elems:
                        index = self.section_elems.index(tagList[-1])
                        for ntp in self.section_elems[:index:-1]:
                            if ntp in cur: cv.terminate(cur[ntp])     
                    elif not cv.linked(cur[tagList[-1]]):
                        pass
#                     else:
#                         cv.terminate(cur[tagList[-1]])
                    del tagList[-1]

                elif openAttrTagRE.fullmatch(elem):
#                     print(f'openAttrTagRE = {elem}')
                    # These are the features linked to coming nodes
                    elem = re.sub(r'\s*=\s*"\s*', '="', elem)
                    tag_split = elem.find(' ')
                    attribs = {key: val.strip() for key, val in [elem.split('="') for elem in elem[tag_split:-1].strip().split('" ')]}
                    for key, val in attribs.items():
                        if val.strip('"') in CORR_ATTRIB_VALS:
                            attribs[key] = CORR_ATTRIB_VALS[val.strip('"')]
                        else:
                            attribs[key] = val.strip('"')
                    if NegatedEditionTag == False:        
                        if 'type' in attribs:
                            if attribs['type'] == 'edition':
                                NegatedEditionTag = True
                                self.generic['urn'] = attribs['n'] if 'n' in attribs else 'not provided'
                                continue
                    tag_name = elem[1:tag_split]
                    if tag_name.startswith('div'):
                        tag_name = 'div'
                    tag = tuple((tag_name, tuple(key for key in attribs.keys() if key not in {'corresp', 'merge',})))
                    if tag_name in excludeTags:
                        if tag_name in cur: cv.terminate(cur[tag_name])
                        tagList.append(tag_name)
                        cur[tag_name] = cv.node(tag_name)
                        continue
                    highest_value_attrib = max(self.len_attribs_dict[tag], 
                                               key=lambda key: self.len_attribs_dict[tag][key])
                    sec = False
                    
                    for v in attribs.values():
                        if v in self.section_elems[:]:
                            sec = True
                            value = v
                    if sec == True:
                        for k, v in attribs.items():
                            if v == value:
                                if v == self.section_elems[0] and not k == 'n':
                                    for ntp in self.section_elems[::-1]:
                                        if ntp in cur: cv.terminate(cur[ntp])
                                    desc = 'first section level'
                                elif len(self.section_elems) > 1 and v == self.section_elems[1] and not k == 'n':
                                    for ntp in self.section_elems[:0:-1]:
                                        if ntp in cur: cv.terminate(cur[ntp])
                                    desc = 'second section level'
                                elif len(self.section_elems) > 2 and v == self.section_elems[2] and not k == 'n':
                                    for ntp in self.section_elems[:1:-1]:
                                        if ntp in cur: cv.terminate(cur[ntp])
                                    desc = 'third section level'
                                elif len(self.section_elems) > 3 and v == self.section_elems[3] and not k == 'n':
                                    for ntp in self.section_elems[:2:-1]:
                                        if ntp in cur: cv.terminate(cur[ntp])
                                    desc = 'fourth section level'    
                                elif len(self.section_elems) > 4 and v == self.section_elems[4] and not k == 'n':
                                    for ntp in self.section_elems[:3:-1]:
                                        if ntp in cur: cv.terminate(cur[ntp])
                                    desc = 'fifth section level'    
                                elif len(self.section_elems) > 5 and v == self.section_elems[5] and not k == 'n':
                                    for ntp in self.section_elems[:4:-1]:
                                        if ntp in cur: cv.terminate(cur[ntp])
                                    desc = 'sixth section level'   
                                else: #elif k == 'n': # in case k == 'n'!
#                                     print(f'openAttrTagRE = {elem}')
                                    v = attribs['subtype'] if 'subtype' in attribs else v
                                    tagList.append(v)
                                    content = attribs[highest_value_attrib].strip()
                                    desc = 'not provided'
                                    if v in self.section_elems:
                                        index = self.section_elems.index(v) - 1
                                        for ntp in self.section_elems[:index:-1]:
                                            if ntp in cur: cv.terminate(cur[ntp])
                                    if v in cur: cv.terminate(cur[v])
                                    cur[v] = cv.node(v)
                                    if not content.isdigit():
                                        nonIntFeatures.add(v)
                                    cv.feature(cur[v], **{v: content})
                                    cv.meta(v, description=desc,)
                                    break
                                tagList.append(v)
                                content = attribs['n'].strip() if 'n' in attribs else attribs[highest_value_attrib].strip()
                                if v in cur: cv.terminate(cur[v])
                                cur[v] = cv.node(v)
                                if not content.isdigit():
                                    nonIntFeatures.add(v)
                                cv.feature(cur[v], **{v: content})
                                if 'corresp' in attribs:
                                    cv.feature(cur[v], **{'corresp': attribs['corresp']})
                                    nonIntFeatures.add('corresp')
                                    cv.meta('corresp', description='this feature shows a correspondence with another source at the place indicated')
                                cv.meta(v, description=desc,)
                                break
                    else:
                        # If only one attrib differs: it cannot be made clear which name to choose, hence choose everything
                        
                        
                        
                        attribList = []
                        for attr in self.len_attribs_dict[tag]:
                            if self.len_attribs_dict[tag][attr] > 1:
                                attribList.append(attr)
                        if len(attribList) > 1:
                            
                            tag_name += '-' + '-'.join([v for k, v in attribs.items() 
                                                        if k in attribList 
                                                        and not k == highest_value_attrib
                                                        and not v[0].isdigit()])
                            if tag_name.endswith('-'):
                                tag_name += '-'.join([v for k, v in attribs.items() 
                                                        if k in attribList 
                                                        and not v[0].isdigit()])
                        content = attribs['n'] if 'n' in attribs else attribs[highest_value_attrib]
                        tagList.append(tag_name)
                        if tag_name in cur: cv.terminate(cur[tag_name])
                        cur[tag_name] = cv.node(tag_name)
                        if not content.isdigit():
                            nonIntFeatures.add(tag_name)
                        cv.feature(cur[tag_name], **{tag_name: content})
                        cv.meta(tag_name, description="not provided",)
                        continue
                        
                elif closedAttrTagRE.fullmatch(elem):
#                     print(f'closedAttrTagRE = {elem}')
                    elem = re.sub(r'\s*=\s*"\s*', '="', elem)
                    tag_split = elem.find(' ')
                    attribs = {key: val.strip() for key, val in [elem.split('="') for elem in elem[tag_split:-2].strip().split('" ')]}
                    for key, val in attribs.items():
                        if val.strip('"') in CORR_ATTRIB_VALS:
                            attribs[key] = CORR_ATTRIB_VALS[val.strip('"')]
                        else:
                            attribs[key] = val.strip('"')
                    tag_name = elem[1:tag_split]
                    tag = tuple((tag_name, tuple(key for key in attribs.keys() if key not in {'corresp', 'merge',})))
                    highest_value_attrib = max(self.len_attribs_dict[tag], 
                                               key=lambda key: self.len_attribs_dict[tag][key])
                    sec = False
                    for v in attribs.values():
                        if v in self.section_elems[:]:
                            sec = True
                            value = v
                            break
                    if sec == True:
                        for k, v in attribs.items():
                            if v == value:
                                if v == self.section_elems[0] and not k == 'n':
                                    for ntp in self.section_elems[::-1]:
                                        if ntp in cur: cv.terminate(cur[ntp])
                                    desc = 'first section level'
                                elif len(self.section_elems) > 1 and v == self.section_elems[1] and not k == 'n':
                                    for ntp in self.section_elems[:0:-1]:
                                        if ntp in cur: cv.terminate(cur[ntp])
                                    desc = 'second section level'
                                elif len(self.section_elems) > 2 and v == self.section_elems[2] and not k == 'n':
                                    for ntp in self.section_elems[:1:-1]:
                                        if ntp in cur: cv.terminate(cur[ntp])
                                    desc = 'third section level'
                                elif len(self.section_elems) > 3 and v == self.section_elems[3] and not k == 'n':
                                    for ntp in self.section_elems[:2:-1]:
                                        if ntp in cur: cv.terminate(cur[ntp])
                                    desc = 'fourth section level'    
                                elif len(self.section_elems) > 4 and v == self.section_elems[4] and not k == 'n':
                                    for ntp in self.section_elems[:3:-1]:
                                        if ntp in cur: cv.terminate(cur[ntp])
                                    desc = 'fifth section level'    
                                elif len(self.section_elems) > 5 and v == self.section_elems[5] and not k == 'n':
                                    for ntp in self.section_elems[:4:-1]:
                                        if ntp in cur: cv.terminate(cur[ntp])
                                    desc = 'sixth section level'    
                                else: #elif k == 'n': # in case k == 'n'!
#                                     print(f'openAttrTagRE = {elem}')
                                    v = attribs['subtype'] if 'subtype' in attribs else v
                                    content = attribs[highest_value_attrib].strip()
                                    desc = 'not provided'
                                    if v in self.section_elems:
                                        index = self.section_elems.index(v) - 1
                                        for ntp in self.section_elems[:index:-1]:
                                            if ntp in cur: cv.terminate(cur[ntp])
                                    if v in cur: cv.terminate(cur[v])
                                    cur[v] = cv.node(v)
                                    if not content.isdigit():
                                        nonIntFeatures.add(v)
                                    cv.feature(cur[v], **{v: content})
                                    cv.meta(v, description=desc,)
                                    break
                                content = attribs['n'].strip() if 'n' in attribs else attribs[highest_value_attrib].strip()
                                if v in cur: cv.terminate(cur[v])
                                cur[v] = cv.node(v)                            
                                if tag in self.opentags:
                                    n = cv.slot()    
                                if not content.isdigit():
                                    nonIntFeatures.add(v)
                                cv.feature(cur[v], **{v: content})
                                if 'corresp' in attribs:
                                    cv.feature(cur[v], **{'corresp': attribs['corresp']})
                                    nonIntFeatures.add('corresp')
                                    cv.meta('corresp', description='this feature shows a correspondence with another source at the place indicated')
                                cv.meta(v, description=desc,)
                                break

                    else:
                        attribList = []
                        for attr in self.len_attribs_dict[tag]:
                            if self.len_attribs_dict[tag][attr] > 1:
                                attribList.append(attr)
                        if len(attribList) > 1:
                            tag_name += '-' + '-'.join([v for k, v in attribs.items() 
                                                        if k in attribList 
                                                        and not k == highest_value_attrib
                                                        and not v[0].isdigit()])
                            if tag_name.endswith('-'):
                                tag_name += '-'.join([v for k, v in attribs.items() 
                                                        if k in attribList 
#                                                         and not k == highest_value_attrib
                                                        and not v[0].isdigit()])
                        content = attribs['n'].strip() if 'n' in attribs else attribs[highest_value_attrib].strip()
                        if tag_name in cur: cv.terminate(cur[tag_name])
                        cur[tag_name] = cv.node(tag_name)
                        if not content.isdigit():
                            nonIntFeatures.add(tag_name)
                        cv.feature(cur[tag_name], **{tag_name: content})
                        cv.meta(tag_name, description="not given",)
                        continue

                elif opencloseTagRE.fullmatch(elem):
#                     print(f'opencloseTagRE = {elem}')
                    tag_name = elem[1:-2]
                    counter[tag_name] = 1 if tag_name not in counter else counter[tag_name] + 1
                    if tag_name in cur: cv.terminate(cur[tag_name])
                    cur[tag_name] = cv.node(tag_name)
                    cv.feature(cur[tag_name], **{tag_name: counter[tag_name]})
                    cv.meta(tag_name, description="open-close-tag without further specification. See the name of the .tf-file for it's meaning",)

                else: # These are the text nodes themselves
                    if re.fullmatch(r'\s*', elem):
                        continue
                    else:
                        for sec in self.section_elems:
                            if sec not in cv.activeTypes():
                                if sec == self.section_elems[-1]:
                                    cur[sec] = cv.node(sec)
                                    cv.feature(cur[sec], **{sec: 1})
                                else:
                                    cur[sec] = cv.node(sec)
                                    cv.feature(cur[sec], **{sec: 0})
                                    
                        assigned = False
                        for tag in tagList:
                            if tag in excludeTags and tag in cv.activeTypes():
                                elem = normalize(NFD, elem)
#                                 n = cv.slot()
#                                 cv.feature(n, **{tag: elem})
#                                 cur[tag] = cv.node(tag)
                                cv.feature(cur[tag], **{tag: elem})
                                cv.meta(tag, description="open tag without further specification. See the name of the .tf-file for it's meaning",)
                                nonIntFeatures.add(tag)
                                assigned = True
                                break
                        if assigned == True:
                            continue
                        
#                         if tagList[-1] in excludeTags:
#                             if tagList[-1] in cur and cv.linked(cur[tagList[-1]]): 
#                                 cv.terminate(cur[tagList[-1]])
#                             elem = normalize(NFD, elem)
#                             cur[tagList[-1]] = cv.node(tagList[-1])
#                             cv.feature(cur[tagList[-1]], **{tagList[-1]: elem})
#                             cv.meta(tagList[-1], description="open tag without further specification. See the name of the .tf-file for it's meaning",)
#                             nonIntFeatures.add(tagList[-1])
#                             continue

                        # In this stage, the unicode NFC format will be used, 
                        # to prevent that letter accents at the start of a word
                        # will be chopped off; later we change to NFD
                        try:
                            elem.encode('ascii')
#                             elem_uni = filter_non_greek(
#                                            normalize(NFC, beta_to_uni.beta_code(elem))
#                                        )
                            elem_uni = normalize(NFC, beta_to_uni.beta_code(elem))
                        except UnicodeEncodeError:
#                             elem_uni = filter_non_greek(
#                                            normalize(NFC, elem)
#                                        )
                            elem_uni = normalize(NFC, elem)
                        # elem_uni is now containing a string of Greek text 
                        # with NFC normalization only
                        for word in elem_uni.split():
                            # word contains the original form of a Greek word
                            if word == '':
                                continue
                                
                            
                            #counter['word'] +=1
                            
                            # pass the original form of the word into cv.feature
                            for (preWord, midWord, postWord) in splitPunc(word):
                                # midWord_pl will be used for various normalization actions
                                midWord_pl = plainLow(midWord)
                                if midWord_pl == '' or midWord_pl == 'ʼ': # ʼ is a single letter modifier
                                    try:
                                        cv.resume(w)
                                        orig = cv.get('orig', w) + preWord + midWord + postWord
                                        try:
                                            post = cv.get('post', w) + preWord + midWord + postWord
                                            cv.feature(w, post=post)
                                        # If midWord does not exist, any other sign 
                                        # is authomatically assigned to preWord not postWord.
                                        # This preWord is bound to the end of the previous word
                                        except:
                                            pass
                                        cv.feature(w, orig=orig)
                                        cv.terminate(w)
                                        continue
                                    except UnboundLocalError:
                                        continue
                                # Then we need to check for any elided form
                                elif midWord_pl + '’' in ELISION_norm and postWord != '':
                                    # we normalize the elision accent (many different ones have been used!)
                                    # for the original form (any elision accent will be replaced by the standard one)
                                    word = preWord + midWord + '’' + postWord[1:]
                                    # we modify the midWord and the postWord
                                    midWord = ELISION_norm[midWord_pl + '’'] # midWord gets the un-elided form!
                                    postWord = postWord[1:]          # postWord loses the elision accent!
                                # Deletion of movable-nu
                                elif midWord_pl.endswith(('εν', 'σιν', 'στιν')) and len(midWord_pl) >= 3:
                                    midWord = midWord[:-1]
                                # Handling final-sigma
                                elif midWord_pl.endswith('σ'):
                                    midWord = midWord[:-1] + 'ς'
                                # Handling various forms of ου
                                elif midWord_pl in ('ουχ', 'ουκ'):
                                    midWord = midWord[:-1]
                                # Handling ἐξ
                                elif midWord_pl == 'εξ':
                                    midWord = midWord[:-1] + 'κ'
                                # Definition of formats
                                midWord_main = normalize(NFD, midWord.lower())
                                midWord_norm = normalize(NFD, jt_normalise(midWord)[0])
                                midWord_plain = plainLow(midWord)
                                midWord_beta_plain = betacode.conv.uni_to_beta(midWord_plain)
                                # Lemmatization and counter for calculating the coverage ratio
                                lemma = lemmatize(midWord_main, lemmatizer)
                                if lemma.startswith('*'):
                                    lemma = lemmatize(midWord_norm, lemmatizer)
                                    if lemma.startswith('*'):
                                        lemma = lemmatize(midWord_plain, lemmatizer)
                                if lemma.startswith('*'):
                                    lemma_counter[1] +=1
                                else:
                                    lemma_counter[0] +=1
                                
                                # After the pre-processing, we continue to assigning everything
                                # Slot assignment!
                                w = cv.slot()
                                # Feature assignment
                                cv.feature(w, orig=word)
                                cv.feature(w, main=midWord_main)
                                cv.feature(w, norm=midWord_norm)
                                cv.feature(w, plain=midWord_plain)
                                cv.feature(w, beta_plain=midWord_beta_plain)
                                cv.feature(w, lemma=lemma)
                                
                                # Creation of sentence feature at the start of the process
                                if counter['_sentence'] == 0:
                                    counter['_sentence'] +=1
                                    cur['_sentence'] = cv.node('_sentence')
                                    cv.feature(cur['_sentence'], _sentence=counter['_sentence'])
                                    cv.feature(w, _sentence=counter['_sentence'])
                                if preWord != '':
                                    cv.feature(w, pre=preWord)
                                    cv.meta('pre', description='pre gives non-letter characters at the start of a word',)
                                    nonIntFeatures.add('pre')
                                if postWord != '':
                                    cv.feature(w, post=postWord)
                                    cv.meta('post', description='post gives non-letter characters at the end of a word',)
                                    nonIntFeatures.add('post')
                                    if postWord.startswith('.'):
                                        cv.terminate(cur['_sentence'])
                                        counter['_sentence'] +=1
                                        cur['_sentence'] = cv.node('_sentence')
#                                         cv.feature(cur['_sentence'], _sentence=counter['_sentence'])
                                        cv.feature(w, _sentence=counter['_sentence'])
                                        
            else:
                if commentStopRE.fullmatch(elem):
                    Comment = False
                continue
        
        
        nonIntFeatures.update(('word', 'orig', 'main', 'norm', 'plain', 'beta_plain', 'lemma'))        
        cv.meta('lemma', **{'coverage ratio': f'{round(lemma_counter[0] / ((lemma_counter[0] + lemma_counter[1]) / 100 ), 2)}%'})
        for feature in cv.metaData:
            if feature in nonIntFeatures:
                cv.meta(feature, valueType='str')
            else:
                if feature == "":
                    pass
                else:
                    cv.meta(feature, valueType='int')
        # Final check of tags
        tm.indent(level=1)
        if len(tagList) == 0:
            tm.info('No tag mistake(s) found...')
        else:
            tm.info(str(len(tagList)) + ' tag error(s) found.')


In [4]:
CORR_ATTRIB_VALS = {
    '{http://www.w3.org/XML/1998/namespace}id': 'id',
    '{http://www.w3.org/XML/1998/namespace}lang': 'lang',
    'boo': 'book',
    'fo1otnote': 'footnote',
    'fo6tnote': 'footnote',
    'foo1tnote': 'footnote',
    'foodnote': 'footnote',
    'foonote': 'footnote',
    'footn2ote': 'footnote',
    'footno1te': 'footnote',
    'footno3te': 'footnote',
    'footnot': 'footnote',
    'footnot1e': 'footnote',
    'footnot2e': 'footnote',
    'footnote1': 'footnote',
    'footnote2': 'footnote',
    'footnte': 'footnote',
    'footnτote': 'footnote',
    'footote': 'footnote',
    'fotnote': 'footnote',
    'Τfootnote': 'footnote',
    'm5arginal': 'marginal',
    'margi4nal': 'marginal',
    'margial': 'marginal',
    'marginael': 'marginal',
    'marginai': 'marginal',
    'marginale': 'marginal',
    'marginalp': 'marginal',
    'marginapl': 'marginal',
    'marginaΕl': 'marginal',
    'marginaΣl': 'marginal',
    'marginl': 'marginal',
    'margipnal': 'marginal',
    'margnal': 'marginal',
    'margpinal': 'marginal',
    'marinalΑB': 'marginal',
    'marpginal': 'marginal',
    'märginal': 'marginal',
    ' chapter': 'chapter',
    ' section': 'section',
    'antistrohe': 'antistrophe',
    'chap0ter': 'chapter',
    'chapter1': 'chapter',
    'chapterer': 'chapter',
    'chapters': 'chapter',
    'chaptser': 'chapter',
    'chaspter': 'chapter',
    'ephymn.': 'ephymn',
    'sction': 'section',
    'sectionn': 'section',
    'setence': 'sentence',
    'setion': 'section',
    'secton': 'section',
    'subdsection': 'subsection',
    'subection': 'subsection',
    'pargraph': 'paragraph',
}


# Update the list of James Tauber with some additional forms
ELISION.update(
    {
        'ἔσθ’': 'ἔστι',
        'γ’': 'γέ',
        'μ’': 'μή',
        'τοσαῦτ’': 'τοσοῦτος',
        'ἆρ’': 'ἆρα',
        'προσῆλθ’': 'προσῆλθε',
        'θ’': 'θε',
        'ἐνθάδ’': 'ἐνθάδε',
        'ἔστ’': 'ἔστε',
        'τοτ’': 'τοτε',
        'σ’': 'σε',
        'οὔτ’': 'οὔτε',
        'ἠδ’': 'ἠδη',
        'τ’': 'τε',
    }
    )
#Normalize ELISION to unaccented keys and normalized accented values
ELISION_norm = {plainLow(k) + '’': normalize(NFC, v) for k, v in ELISION.items()}

attributes = {'id', 'cols', 'hand', 'subtype', 'evidence', 'lang', 'value', 'direct', '{http://www.w3.org/XML/1998/namespace}id', 'status', 'from', 'to', 'corresp', 'who', 'key', 'ed', 'rows', 'cause', 'source', '{http://www.w3.org/XML/1998/namespace}lang', 'extent', 'part', 'targOrder', 'anchored', 'ana', 'target', 'quantity', 'default', 'unit', 'cert', 'reason', 'org', 'TEIform', 'instant', 'n', 'type', 'role', 'rend', 'place', 'break', 'desc', 'sample', 'met', 'resp', 'url'}
attrib_type = {'*marturi/a', '*pro/klhsis', 'sphragis', 'proverb', 'bekker page', 'NarrProof', 'noclass', 'footnot', 'hexameter', 'complaint', 'statute', 'Parabasis', 'tetrameters', 'antiprelude', 'anapests', 'fo6tnote', 'marginaAl', 'marginalXXXIVv', 'Text', 'Continued', 'summary', 'proepirrheme', 'mesode', '*yh/fisma', 'prose', 'agreement', 'marginal919a', 'fragment', '*grafh/', 'num', 'footnote1', 'commentary', '*)ekmarturi/a', 'law', 'marginaΕl', '*xro/nos', 'festival', 'alternative', 'subsection', 'noparse', 'section', '*)ara/', 'challenge', 'footnτote', 'margin', 'eleg', 'meter', 'toc', 'footno1te', 'index', 'ethnic', 'Book', 'decree', 'marpginal', 'winner', 'boo', 'altnum', 'marginale', 'trimeter', 'Agon', 'episode', 'schedule', 'catchword', '*dialogismo\\s tw=n *(hmerw=n', 'marginai', 'Parodos', 'dates', 'footnte', 'marginalB', 'margina6l', 'margina70rl', 'footno3te', '*yhfi/smata', 'marginalW', 'proagon', 'prelude', 'salutation', 'margial', 'poem', 'monody', 'indictment', 'oath', '*sunhtopi/a *boiwtw=n kai\\ *fwke/wn', 'editorial', 'sling', 'testimonium', 'marginalHdt.', 'epirrheme', 'verse paraphrase', '*do/gma *summa/xwn', 'troch', '*sunqh=kai', 'Τfootnote', 'Antikatakeleusmos', 'lease', 'corr', 'strophe', 'footote', '*no/mos', 'continued', 'antepirrheme', 'Epirrheme', 'Lyric-Scene', 'iamb', 'footnot2e', 'm5arginal', 'translation', 'worktitle', 'margi4nal', 'antiproepirrheme', 'verse', 'will', 'names', 'resolution', 'marginal77v', 'antistrophe', '*do/gma *sune/drwn', 'dactyls', 'witnesses', 'inscription', 'group', 'footnote', 'mentioned', 'verse-paraphrase', 'clause', 'margina', 'depositions', 'foonote', 'chapter', 'footn2ote', 'subscription', 'Verse', 'nomorph', 'fo1otnote', 'intro', 'prologue', 'reply', 'Episode', 'Katakeleusmos', 'margpinal', 'constellation', 'elegiacs', 'antikatakeleusmos', 'explanation', 'place', 'language', 'desc', 'footnote2', 'tetralogy', 'marginal', 'part', 'nomSac', 'Choral', 'katakeleusmenos', 'trochees', '*marturi/ai', 'deposition', 'foo1tnote', 'month', 'marginalE', 'inscript', 'parabasis', 'marginapl', 'märginal', 'speaker', 'marginalC', 'subtitle', 'antipnigos', 'dact', 'suggestion', 'counter-plea', 'person', 'Extract', 'pnigos', 'direct', 'subtext', 'unspecified', 'katakeleusmos', 'textpart', 'term', 'emph', 'marginl', 'dialogue', '*diaqh=kai', 'close', 'Prologue', 'marginalp', 'Name', 'witness', 'terms', 'marginaDl', 'orig', '*ma/rtures', 'race', 'text', '*)epistolh/', 'header', 'footnot1e', 'foodnote', 'marinalΑB', 'iambic', '*(/orkoi', 'title', 'main', 'epode', 'book', 'marginaΣl', 'sub', 'choral', 'letter', 'oracle', 'Papyr', 'antikatakeleusmenos', 'marginael', 'paraphrase', 'iambics', 'antepirrhema', '*yh/fisma peri\\ *dwrea\\s toi=s a)po\\ *fulh=s', 'Exodus', 'drama', 'margipnal', 'lyric', 'fotnote', 'bibliography', 'spoken', 'lemma', 'Prose', 'margnal', '*no/moi', 'argument', 'epirrhema', 'edition', 'work', 'margina15vl'}
attrib_subtype = {'hexameter', 'Parabasis', 'tetrameters', 'anapests', 'antiprelude', 'source', 'sentence', 'comment', 'page', 'Letter', 'fragment', 'conspectus', 'Antepirrheme', 'commentary', 'TOC', 'subsection', 'section', 'quaestio', 'subdsection', 'toc', 'auctorm', 'index', 'fabula', ' chapter', 'epistle', 'ephymn.', 'Book', 'chapterer', 'preface', 'exordium', 'Agon', 'castlist', 'episode', 'Parodos', 'proagon', 'prelude', 'poem', 'monody', 'chap0ter', 'epirrheme', 'ephymnion', 'Antikatakeleusmos', 'chaptser', 'strophe', 'dramatispersonae', 'line', 'antepirrheme', 'Epirrheme', 'sectionn', 'Lyric-Scene', 'ii_loci', 'sction', 'sigla', 'auctores', 'chaspter', 'verse', 'antistrohe', 'Pnigos', 'ancient', 'Antipnigos', 'antistrophe', 'volume', 'dactyls', 'haeresis', 'wolfii', 'chapter', 'appendix', ' section', 'iii_loci', 'number', 'Episode', 'Katakeleusmos', 'paragraph', 'antikatakeleusmos', 'ephymn', 'aphorism', 'corrigenda', 'part', 'Choral', 'hypothesis', 'katakeleusmenos', 'trochees', 'subection', 'parabasis', 'essay', 'proode', 'autorum', 'antipnigos', 'pnigos', 'kommos', 'epigram', 'katakeleusmos', 'addenda', 'dialogue', 'close', 'Prologue', 'supplementa', 'setion', 'ducangii', 'setence', 'entry', 'chapter1', 'index.1', 'chapters', 'epode', 'book', 'epigraph', 'speech', 'loci', 'letter', 'choral', 'antikatakeleusmenos', 'iambics', 'trochaic', 'iv_loci', 'Exodus', 'type', 'lyric', 'index.2', 'homilia', 'work'}
tag_names = {'head', 'pb', 'note', 'hi', 'lg', 'gap', 'div1', 'seg', 'div2', 'sic', 'del', 'add', 'milestone', 'title', 'q', 'div', 'p', 'l', 'lb', 'argument', 'sp', 'div3', 'num', 'quote', 'speaker', 'bibl', 'date', 'ab', 'lemma', 'foreign'} # Biblical and Patristic literature only


# Configuration of the TF director

The function `authorWork(path)` reads some metadata from the sourcefiles to process them properly. Then we process the xml-files by reading them and calling the `cv.walk()` function. As a result, valid TF-packages should be produced.

In [5]:
tm = Timestamp()

def authorWork(path):
    author = None
    editor = None
    book = None
    afound = False
    efound = False
    bfound = False
    TitleStmt = False
    metaTaglist = []
    metaData = {}
    
    with open(path) as xml:
        data = ' '.join([line.strip() for line in list(takewhile(lambda line: not bodyStartRE.search(line), xml.readlines()))])\
                      .replace('<', '#!#<')\
                      .replace('>', '>#!#')\
                      .split('#!#')
        for elem in data:
            elem = elem.strip('{ ,.}')
            if elem == '':
                continue
            if elem.startswith('<body'):
                break
            elif elem.startswith('<titleStmt'):
                TitleStmt = True
            elif elem.startswith('</titleStmt'):
                TitleStmt = False
            elif TitleStmt == True:
                if elem.startswith('<'):
                    tag_split = elem.find(' ') if not elem.find(' ') == -1 else elem.find('>')
                    metaTaglist.append(elem[1:tag_split])
                else:
                    if metaTaglist[-1] in metaData:
                        metaData[metaTaglist[-1]] += \
                            f', {elem}' if not elem in metaData[metaTaglist[-1]] else ''
                    else:
                        metaData[metaTaglist[-1]] = elem
        if not 'author' in metaData and not 'editor' in metaData:
            TitleStmt = False
            for elem in data:
                elem = elem.strip('{ ,.}')
                if elem.startswith('<body'):
                    break
                if elem == '':
                    continue 
                elif elem.startswith('<biblStruct'):
                    TitleStmt = True
                elif elem.startswith('</biblStruct'):
                    TitleStmt = False
                elif TitleStmt == True:
                    if elem.startswith('<'):
                        tag_split = elem.find(' ') if not elem.find(' ') == -1 else elem.find('>')
                        metaTaglist.append(elem[1:tag_split])
                    else:
                        if metaTaglist[-1] in metaData:
                            metaData[metaTaglist[-1]] += \
                                f', {elem}' if not elem in metaData[metaTaglist[-1]] else ''
                        else:
                            metaData[metaTaglist[-1]] = elem

    author = metaData['author'].title() if 'author' in metaData \
                else metaData['editor'].title() if 'editor' in metaData \
                else 'undefined'
    
    book = metaData['title'].replace('(Greek)', '').replace('.', '').replace(',', '').replace('Machine readable text', '').strip().title()
    return (author, book)

COUNTER1 = 0
COUNTER2 = 0

for xmlfile in glob.glob(SRC_DIR+'/**/*grc*.xml', recursive=True):
# for xmlfile in glob.glob(SRC_DIR + '/tlg2042'+'/**/*grc*.xml', recursive=True):
# for xmlfile in glob.glob(SRC_DIR + '/tlg0031/tlg004/tlg0031.tlg004.perseus-grc2.xml'):
# for xmlfile in glob.glob(SRC_DIR + '/tlg0555/tlg002/tlg0555.tlg002.opp-grc1.xml'):
# for xmlfile in glob.glob(SRC_DIR + '/tlg0555/tlg001/tlg0555.tlg001.opp-grc1.xml'):
# for xmlfile in glob.glob(SRC_DIR +'/tlg0555/**/*grc*.xml', recursive=True):
    if COUNTER1 >= 1:
        print('\n\n')
    COUNTER1 +=1

    tm.info(f'parsing {xmlfile}\n')
    (author, book) = authorWork(xmlfile)
    if os.path.isdir(f'{TF_DIR}/{author}/{book}/tf/{VERSION}'):
        C = 1
        while os.path.isdir(f'{TF_DIR}/{author}/{C}_{book}/tf/{VERSION}'):
            C +=1
        else:
            TF_PATH = f'{TF_DIR}/{author}/{C}_{book}/tf/{VERSION}'
    else:
        TF_PATH = f'{TF_DIR}/{author}/{book}/tf/{VERSION}'
    TF = Fabric(locations=TF_PATH)
    cv = CV(TF)
    x = Conversion(xmlfile)
    slotType = 'word'
    good = cv.walk(
        x.director,
        x.slotType,
        otext=x.otext,
        generic=x.generic,
        intFeatures=x.intFeatures,
        featureMeta=x.featureMeta,
        warn=False,
    )
    if good: COUNTER2 +=1
tm.info(f'{COUNTER2} of {COUNTER1} works have successfully been converted!')
lemmatizer_open.close()

  0.17s parsing /home/ernstboogert/github/pthu/sources/greek_sources/canonical-greekLit/data/tlg0032/tlg012/tlg0032.tlg012.perseus-grc2.xml

This is Text-Fabric 7.8.7
Api reference : https://annotation.github.io/text-fabric/Api/Fabric/

0 features found and 0 ignored
  0.00s Warp feature "otype" not found in
/home/ernstboogert/github/pthu/greek_literature/Xenophon/On The Cavalry Commander/tf/1.0/
  0.00s Warp feature "oslots" not found in
/home/ernstboogert/github/pthu/greek_literature/Xenophon/On The Cavalry Commander/tf/1.0/
  0.00s Warp feature "otext" not found. Working without Text-API

  0.00s Importing data from walking through the source ...
   |     0.00s Preparing metadata... 
   |   SECTION   TYPES:    _book, chapter, section
   |   SECTION   FEATURES: _book, chapter, section
   |   STRUCTURE TYPES:    _book, chapter, section
   |   STRUCTURE FEATURES: _book, chapter, section
   |   TEXT      FEATURES:
   |      |   text-orig-beta-plain beta_plain
   |      |   text-orig-ful

   |      |   text-orig-plain      plain
   |     0.01s OK
   |     0.00s Following director... 
   |    -0.00s No tag mistake(s) found...
   |     4.02s "edge" actions: 0
   |     4.02s "feature" actions: 561057
   |     4.02s "node" actions: 5947
   |     4.02s "resume" actions: 28
   |     4.02s "slot" actions: 79293
   |     4.02s "terminate" actions: 7537
   |          1 x "_book" node 
   |       2649 x "_sentence" node 
   |         28 x "add" node 
   |          8 x "book" node 
   |         49 x "chapter" node 
   |         97 x "del" node 
   |          4 x "gap" node 
   |          8 x "head" node 
   |          2 x "l" node 
   |        270 x "milestone" node 
   |       1409 x "p" node 
   |          2 x "quote" node 
   |       1417 x "section" node 
   |          3 x "sic" node 
   |      79293 x "word" node  = slot type
   |      85240 nodes of all types
   |     4.06s OK
   |     0.00s Removing unlinked nodes ... 
   |      |    -0.00s      8 unlinked "chapter" nodes: 

   |     0.36s "resume" actions: 3
   |     0.36s "slot" actions: 6983
   |     0.36s "terminate" actions: 850
   |          1 x "_book" node 
   |        282 x "_sentence" node 
   |         10 x "add" node 
   |         12 x "chapter" node 
   |         19 x "del" node 
   |          2 x "gap" node 
   |         25 x "milestone" node 
   |        155 x "p" node 
   |        154 x "section" node 
   |       6983 x "word" node  = slot type
   |       7643 nodes of all types
   |     0.37s OK
   |     0.00s Removing unlinked nodes ... 
   |      |    -0.00s      1 unlinked "_sentence" node: [282]
   |      |     0.00s      1 unlinked node
   |      |     0.00s Leaving   7642 nodes
   |     0.00s checking for nodes and edges ... 
   |     0.00s OK
   |     0.00s checking features ... 
   |     0.00s OK
   |     0.00s reordering nodes ...
   |     0.00s Sorting 1 nodes of type "_book"
   |     0.01s Sorting 281 nodes of type "_sentence"
   |     0.01s Sorting 10 nodes of type "add"
   |  

   |      |     0.04s node feature "_book" with 1 node
   |      |     0.04s node feature "_sentence" with 308 nodes
   |      |     0.04s node feature "add" with 34 nodes
   |      |     0.04s node feature "beta_plain" with 9145 nodes
   |      |     0.05s node feature "chapter" with 13 nodes
   |      |     0.05s node feature "del" with 19 nodes
   |      |     0.05s node feature "gap" with 1 node
   |      |     0.05s node feature "lemma" with 9145 nodes
   |      |     0.06s node feature "main" with 9145 nodes
   |      |     0.06s node feature "milestone" with 32 nodes
   |      |     0.07s node feature "norm" with 9145 nodes
   |      |     0.07s node feature "orig" with 9145 nodes
   |      |     0.07s node feature "p" with 217 nodes
   |      |     0.07s node feature "plain" with 9145 nodes
   |      |     0.08s node feature "post" with 9144 nodes
   |      |     0.08s node feature "pre" with 3 nodes
   |      |     0.08s node feature "section" with 215 nodes
   |     0.04s OK


   |     0.22s OK
  0.00s Exporting 22 node and 1 edge and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Hellenica/tf/1.0:
  0.00s VALIDATING oslots feature
  0.01s VALIDATING oslots feature
  0.01s maxSlot=      66514
  0.01s maxNode=      71696
  0.01s OK: oslots is valid
   |     0.00s T _book                to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Hellenica/tf/1.0
   |     0.01s T _sentence            to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Hellenica/tf/1.0
   |     0.00s T add                  to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Hellenica/tf/1.0
   |     0.13s T beta_plain           to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Hellenica/tf/1.0
   |     0.00s T book                 to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Hellenica/tf/1.0
   |     0.00s T chapter              to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Hellenica/tf/1.0
   

   |     0.00s T gap                  to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Constitution Of The Lacedaemonians/tf/1.0
   |     0.03s T lemma                to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Constitution Of The Lacedaemonians/tf/1.0
   |     0.02s T main                 to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Constitution Of The Lacedaemonians/tf/1.0
   |     0.00s T milestone            to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Constitution Of The Lacedaemonians/tf/1.0
   |     0.02s T norm                 to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Constitution Of The Lacedaemonians/tf/1.0
   |     0.01s T orig                 to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Constitution Of The Lacedaemonians/tf/1.0
   |     0.00s T otype                to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Constitution Of The Lacedaemonians/tf/1.0
   |     0.00s T p  

   |     0.00s T section              to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Hiero/tf/1.0
   |     0.00s T sic                  to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Hiero/tf/1.0
   |     0.01s T oslots               to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Hiero/tf/1.0
   |     0.00s M otext                to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Hiero/tf/1.0
  0.17s Exported 19 node features and 1 edge features and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Hiero/tf/1.0



    15s parsing /home/ernstboogert/github/pthu/sources/greek_sources/canonical-greekLit/data/tlg0032/tlg002/tlg0032.tlg002.perseus-grc2.xml

This is Text-Fabric 7.8.7
Api reference : https://annotation.github.io/text-fabric/Api/Fabric/

0 features found and 0 ignored
  0.00s Warp feature "otype" not found in
/home/ernstboogert/github/pthu/greek_literature/Xenophon/Memorabilia/tf/1.0/
  0.00s Warp feat

   |     0.00s T pre                  to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Memorabilia/tf/1.0
   |     0.00s T quote                to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Memorabilia/tf/1.0
   |     0.00s T section              to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Memorabilia/tf/1.0
   |     0.00s T sic                  to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Memorabilia/tf/1.0
   |     0.00s T term                 to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Memorabilia/tf/1.0
   |     0.08s T oslots               to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Memorabilia/tf/1.0
   |     0.00s M otext                to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Memorabilia/tf/1.0
  0.75s Exported 26 node features and 1 edge features and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Memorabilia/tf/1.0



    18s parsing /home/e

   |     0.18s T oslots               to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Anabasis/tf/1.0
   |     0.00s M otext                to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Anabasis/tf/1.0
  1.32s Exported 22 node features and 1 edge features and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Anabasis/tf/1.0



    22s parsing /home/ernstboogert/github/pthu/sources/greek_sources/canonical-greekLit/data/tlg0032/tlg011/tlg0032.tlg011.perseus-grc2.xml

This is Text-Fabric 7.8.7
Api reference : https://annotation.github.io/text-fabric/Api/Fabric/

0 features found and 0 ignored
  0.00s Warp feature "otype" not found in
/home/ernstboogert/github/pthu/greek_literature/Xenophon/Ways And Means/tf/1.0/
  0.00s Warp feature "oslots" not found in
/home/ernstboogert/github/pthu/greek_literature/Xenophon/Ways And Means/tf/1.0/
  0.01s Warp feature "otext" not found. Working without Text-API

  0.00s Importing data from walking th

   |     0.46s "edge" actions: 0
   |     0.46s "feature" actions: 52290
   |     0.47s "node" actions: 580
   |     0.47s "resume" actions: 6
   |     0.47s "slot" actions: 7385
   |     0.47s "terminate" actions: 753
   |          1 x "_book" node 
   |        241 x "_sentence" node 
   |         20 x "add" node 
   |         11 x "chapter" node 
   |         16 x "del" node 
   |          2 x "gap" node 
   |         15 x "milestone" node 
   |        137 x "p" node 
   |        137 x "section" node 
   |       7385 x "word" node  = slot type
   |       7965 nodes of all types
   |     0.49s OK
   |     0.00s Removing unlinked nodes ... 
   |      |    -0.00s      1 unlinked "_sentence" node: [241]
   |      |     0.00s      1 unlinked node
   |      |     0.00s Leaving   7964 nodes
   |     0.00s checking for nodes and edges ... 
   |     0.00s OK
   |     0.00s checking features ... 
   |     0.00s OK
   |     0.00s reordering nodes ...
   |     0.01s Sorting 1 nodes of type "_boo

   |      |     0.06s node feature "main" with 17819 nodes
   |      |     0.07s node feature "milestone" with 22 nodes
   |      |     0.07s node feature "norm" with 17819 nodes
   |      |     0.08s node feature "orig" with 17819 nodes
   |      |     0.09s node feature "p" with 397 nodes
   |      |     0.09s node feature "plain" with 17819 nodes
   |      |     0.09s node feature "post" with 17819 nodes
   |      |     0.10s node feature "pre" with 20 nodes
   |      |     0.10s node feature "section" with 397 nodes
   |      |     0.10s node feature "sic" with 5 nodes
   |     0.06s OK
  0.00s Exporting 17 node and 1 edge and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Economics/tf/1.0:
  0.00s VALIDATING oslots feature
  0.01s VALIDATING oslots feature
  0.01s maxSlot=      17819
  0.01s maxNode=      19277
  0.01s OK: oslots is valid
   |     0.00s T _book                to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Economics/tf/1.

   |     0.00s T pre                  to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Apology/tf/1.0
   |     0.00s T section              to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Apology/tf/1.0
   |     0.00s T oslots               to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Apology/tf/1.0
   |     0.00s M otext                to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Apology/tf/1.0
  0.09s Exported 15 node features and 1 edge features and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Xenophon/Apology/tf/1.0



    26s parsing /home/ernstboogert/github/pthu/sources/greek_sources/canonical-greekLit/data/tlg0032/tlg004/tlg0032.tlg004.perseus-grc2.xml

This is Text-Fabric 7.8.7
Api reference : https://annotation.github.io/text-fabric/Api/Fabric/

0 features found and 0 ignored
  0.00s Warp feature "otype" not found in
/home/ernstboogert/github/pthu/greek_literature/Xenophon/Symposium/tf/1.0/
  0.00s W

  0.00s Warp feature "oslots" not found in
/home/ernstboogert/github/pthu/greek_literature/Plato/Republic/tf/1.0/
  0.00s Warp feature "otext" not found. Working without Text-API

  0.00s Importing data from walking through the source ...
   |     0.00s Preparing metadata... 
   |   SECTION   TYPES:    _book, book, page
   |   SECTION   FEATURES: _book, book, page
   |   STRUCTURE TYPES:    _book, book, page, section
   |   STRUCTURE FEATURES: _book, book, page, section
   |   TEXT      FEATURES:
   |      |   text-orig-beta-plain beta_plain
   |      |   text-orig-full       orig
   |      |   text-orig-lemma      lemma
   |      |   text-orig-main       main
   |      |   text-orig-norm       norm
   |      |   text-orig-plain      plain
   |     0.01s OK
   |     0.00s Following director... 
   |    -0.00s No tag mistake(s) found...
   |     4.99s "edge" actions: 0
   |     4.99s "feature" actions: 628646
   |     4.99s "node" actions: 10734
   |     4.99s "resume" actions: 25
   | 

0 features found and 0 ignored
  0.00s Warp feature "otype" not found in
/home/ernstboogert/github/pthu/greek_literature/Plato/Cleitophon/tf/1.0/
  0.00s Warp feature "oslots" not found in
/home/ernstboogert/github/pthu/greek_literature/Plato/Cleitophon/tf/1.0/
  0.00s Warp feature "otext" not found. Working without Text-API

  0.00s Importing data from walking through the source ...
   |     0.00s Preparing metadata... 
   |   SECTION   TYPES:    _book, perseus, page
   |   SECTION   FEATURES: _book, perseus, page
   |   STRUCTURE TYPES:    _book, perseus, page
   |   STRUCTURE FEATURES: _book, perseus, page
   |   TEXT      FEATURES:
   |      |   text-orig-beta-plain beta_plain
   |      |   text-orig-full       orig
   |      |   text-orig-lemma      lemma
   |      |   text-orig-main       main
   |      |   text-orig-norm       norm
   |      |   text-orig-plain      plain
   |     0.01s OK
   |     0.00s Following director... 
   |    -0.00s No tag mistake(s) found...
   |     0

   |     0.00s Removing unlinked nodes ... 
   |      |    -0.00s      2 unlinked "_sentence" nodes: [1, 317]
   |      |     0.00s      1 unlinked "l" node: [1]
   |      |     0.00s      3 unlinked nodes
   |      |     0.00s Leaving   4998 nodes
   |     0.00s checking for nodes and edges ... 
   |     0.00s OK
   |     0.00s checking features ... 
   |     0.00s OK
   |     0.00s reordering nodes ...
   |     0.00s Sorting 1 nodes of type "_book"
   |     0.00s Sorting 315 nodes of type "_sentence"
   |     0.00s Sorting 5 nodes of type "bibl"
   |     0.00s Sorting 2 nodes of type "cit"
   |     0.01s Sorting 1 nodes of type "del"
   |     0.01s Sorting 25 nodes of type "l"
   |     0.01s Sorting 174 nodes of type "label"
   |     0.01s Sorting 61 nodes of type "milestone"
   |     0.01s Sorting 174 nodes of type "p"
   |     0.01s Sorting 20 nodes of type "page"
   |     0.01s Sorting 13 nodes of type "perseus"
   |     0.01s Sorting 3 nodes of type "q"
   |     0.02s Sorting 5 n

   |     0.01s OK
   |     0.00s reassigning feature values ...
   |      |     0.02s node feature "_book" with 1 node
   |      |     0.02s node feature "_sentence" with 104 nodes
   |      |     0.02s node feature "add" with 1 node
   |      |     0.02s node feature "beta_plain" with 2393 nodes
   |      |     0.02s node feature "bibl" with 1 node
   |      |     0.02s node feature "del" with 1 node
   |      |     0.02s node feature "l" with 1 node
   |      |     0.02s node feature "lemma" with 2393 nodes
   |      |     0.02s node feature "main" with 2393 nodes
   |      |     0.03s node feature "milestone" with 154 nodes
   |      |     0.03s node feature "norm" with 2393 nodes
   |      |     0.03s node feature "orig" with 2393 nodes
   |      |     0.03s node feature "p" with 141 nodes
   |      |     0.03s node feature "page" with 11 nodes
   |      |     0.03s node feature "perseus" with 8 nodes
   |      |     0.03s node feature "plain" with 2393 nodes
   |      |     0.03s 

   |      |     0.10s node feature "p" with 400 nodes
   |      |     0.10s node feature "page" with 91 nodes
   |      |     0.10s node feature "perseus" with 53 nodes
   |      |     0.10s node feature "plain" with 17019 nodes
   |      |     0.11s node feature "post" with 17019 nodes
   |      |     0.12s node feature "pre" with 9 nodes
   |      |     0.12s node feature "q" with 22 nodes
   |      |     0.12s node feature "quote" with 6 nodes
   |      |     0.12s node feature "said" with 400 nodes
   |      |     0.12s node feature "term" with 28 nodes
   |     0.08s OK
  0.00s Exporting 26 node and 1 edge and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Plato/Phaedrus/tf/1.0:
  0.00s VALIDATING oslots feature
  0.00s VALIDATING oslots feature
  0.00s maxSlot=      17019
  0.00s maxNode=      19702
  0.01s OK: oslots is valid
   |     0.00s T _book                to /home/ernstboogert/github/pthu/greek_literature/Plato/Phaedrus/tf/1.0
   |     0.00s T _sent

   |      |     0.11s node feature "orig" with 17217 nodes
   |      |     0.12s node feature "p" with 1178 nodes
   |      |     0.12s node feature "page" with 80 nodes
   |      |     0.12s node feature "perseus" with 53 nodes
   |      |     0.12s node feature "plain" with 17217 nodes
   |      |     0.13s node feature "post" with 17217 nodes
   |      |     0.13s node feature "pre" with 3 nodes
   |      |     0.13s node feature "q" with 28 nodes
   |      |     0.13s node feature "quote" with 4 nodes
   |      |     0.13s node feature "said" with 1178 nodes
   |      |     0.13s node feature "sic" with 4 nodes
   |      |     0.14s node feature "term" with 16 nodes
   |     0.08s OK
  0.00s Exporting 26 node and 1 edge and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Plato/Sophist/tf/1.0:
  0.00s VALIDATING oslots feature
  0.01s VALIDATING oslots feature
  0.01s maxSlot=      17217
  0.01s maxNode=      23353
  0.01s OK: oslots is valid
   |     0.00s T _b

   |      |     0.14s node feature "page" with 48 nodes
   |      |     0.14s node feature "perseus" with 33 nodes
   |      |     0.14s node feature "plain" with 11179 nodes
   |      |     0.15s node feature "post" with 11179 nodes
   |      |     0.15s node feature "pre" with 2 nodes
   |      |     0.16s node feature "q" with 8 nodes
   |      |     0.16s node feature "quote" with 1 node
   |      |     0.16s node feature "said" with 907 nodes
   |     0.08s OK
  0.00s Exporting 23 node and 1 edge and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Plato/Alcibiades 1/tf/1.0:
  0.01s VALIDATING oslots feature
  0.01s VALIDATING oslots feature
  0.01s maxSlot=      11179
  0.01s maxNode=      15726
  0.02s OK: oslots is valid
   |     0.00s T _book                to /home/ernstboogert/github/pthu/greek_literature/Plato/Alcibiades 1/tf/1.0
   |     0.00s T _sentence            to /home/ernstboogert/github/pthu/greek_literature/Plato/Alcibiades 1/tf/1.0
   |     0.

   |      |     0.17s node feature "plain" with 17620 nodes
   |      |     0.17s node feature "post" with 17620 nodes
   |      |     0.18s node feature "pre" with 73 nodes
   |      |     0.18s node feature "q" with 65 nodes
   |      |     0.19s node feature "quote" with 39 nodes
   |      |     0.19s node feature "said" with 315 nodes
   |      |     0.19s node feature "term" with 8 nodes
   |     0.11s OK
  0.00s Exporting 26 node and 1 edge and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Plato/Protagoras/tf/1.0:
  0.00s VALIDATING oslots feature
  0.01s VALIDATING oslots feature
  0.01s maxSlot=      17620
  0.01s maxNode=      19403
  0.01s OK: oslots is valid
   |     0.00s T _book                to /home/ernstboogert/github/pthu/greek_literature/Plato/Protagoras/tf/1.0
   |     0.00s T _sentence            to /home/ernstboogert/github/pthu/greek_literature/Plato/Protagoras/tf/1.0
   |     0.00s T add                  to /home/ernstboogert/github/pthu/g

   |      |     0.05s node feature "plain" with 6955 nodes
   |      |     0.05s node feature "post" with 6955 nodes
   |      |     0.05s node feature "pre" with 98 nodes
   |      |     0.05s node feature "quote" with 4 nodes
   |      |     0.05s node feature "said" with 254 nodes
   |     0.03s OK
  0.00s Exporting 23 node and 1 edge and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Plato/Lysis/tf/1.0:
  0.00s VALIDATING oslots feature
  0.00s VALIDATING oslots feature
  0.00s maxSlot=       6955
  0.00s maxNode=       7874
  0.00s OK: oslots is valid
   |     0.00s T _book                to /home/ernstboogert/github/pthu/greek_literature/Plato/Lysis/tf/1.0
   |     0.00s T _sentence            to /home/ernstboogert/github/pthu/greek_literature/Plato/Lysis/tf/1.0
   |     0.00s T add                  to /home/ernstboogert/github/pthu/greek_literature/Plato/Lysis/tf/1.0
   |     0.03s T beta_plain           to /home/ernstboogert/github/pthu/greek_literature/Pl

   |     0.00s T del                  to /home/ernstboogert/github/pthu/greek_literature/Plato/Euthydemus/tf/1.0
   |     0.00s T label                to /home/ernstboogert/github/pthu/greek_literature/Plato/Euthydemus/tf/1.0
   |     0.04s T lemma                to /home/ernstboogert/github/pthu/greek_literature/Plato/Euthydemus/tf/1.0
   |     0.04s T main                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Euthydemus/tf/1.0
   |     0.00s T milestone            to /home/ernstboogert/github/pthu/greek_literature/Plato/Euthydemus/tf/1.0
   |     0.03s T norm                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Euthydemus/tf/1.0
   |     0.04s T orig                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Euthydemus/tf/1.0
   |     0.02s T otype                to /home/ernstboogert/github/pthu/greek_literature/Plato/Euthydemus/tf/1.0
   |     0.00s T p                    to /home/ernstboogert/github/pthu/greek_literature/Plato/E

   |     0.00s T label                to /home/ernstboogert/github/pthu/greek_literature/Plato/Alcibiades 2/tf/1.0
   |     0.01s T lemma                to /home/ernstboogert/github/pthu/greek_literature/Plato/Alcibiades 2/tf/1.0
   |     0.01s T main                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Alcibiades 2/tf/1.0
   |     0.00s T milestone            to /home/ernstboogert/github/pthu/greek_literature/Plato/Alcibiades 2/tf/1.0
   |     0.01s T norm                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Alcibiades 2/tf/1.0
   |     0.01s T orig                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Alcibiades 2/tf/1.0
   |     0.00s T otype                to /home/ernstboogert/github/pthu/greek_literature/Plato/Alcibiades 2/tf/1.0
   |     0.00s T p                    to /home/ernstboogert/github/pthu/greek_literature/Plato/Alcibiades 2/tf/1.0
   |     0.00s T page                 to /home/ernstboogert/github/pthu/greek_li

   |     0.00s T gap                  to /home/ernstboogert/github/pthu/greek_literature/Plato/Laws/tf/1.0
   |     0.00s T l                    to /home/ernstboogert/github/pthu/greek_literature/Plato/Laws/tf/1.0
   |     0.00s T label                to /home/ernstboogert/github/pthu/greek_literature/Plato/Laws/tf/1.0
   |     0.31s T lemma                to /home/ernstboogert/github/pthu/greek_literature/Plato/Laws/tf/1.0
   |     0.23s T main                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Laws/tf/1.0
   |     0.00s T milestone            to /home/ernstboogert/github/pthu/greek_literature/Plato/Laws/tf/1.0
   |     0.20s T norm                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Laws/tf/1.0
   |     0.19s T orig                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Laws/tf/1.0
   |     0.05s T otype                to /home/ernstboogert/github/pthu/greek_literature/Plato/Laws/tf/1.0
   |     0.00s T p                   

   |     0.00s T p                    to /home/ernstboogert/github/pthu/greek_literature/Plato/Hipparchus/tf/1.0
   |     0.00s T page                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Hipparchus/tf/1.0
   |     0.00s T perseus              to /home/ernstboogert/github/pthu/greek_literature/Plato/Hipparchus/tf/1.0
   |     0.01s T plain                to /home/ernstboogert/github/pthu/greek_literature/Plato/Hipparchus/tf/1.0
   |     0.00s T post                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Hipparchus/tf/1.0
   |     0.00s T quote                to /home/ernstboogert/github/pthu/greek_literature/Plato/Hipparchus/tf/1.0
   |     0.00s T said                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Hipparchus/tf/1.0
   |     0.01s T oslots               to /home/ernstboogert/github/pthu/greek_literature/Plato/Hipparchus/tf/1.0
   |     0.00s M otext                to /home/ernstboogert/github/pthu/greek_literature/Plato/H

  0.00s Warp feature "otext" not found. Working without Text-API

  0.00s Importing data from walking through the source ...
   |     0.00s Preparing metadata... 
   |   SECTION   TYPES:    _book, perseus, page
   |   SECTION   FEATURES: _book, perseus, page
   |   STRUCTURE TYPES:    _book, perseus, page
   |   STRUCTURE FEATURES: _book, perseus, page
   |   TEXT      FEATURES:
   |      |   text-orig-beta-plain beta_plain
   |      |   text-orig-full       orig
   |      |   text-orig-lemma      lemma
   |      |   text-orig-main       main
   |      |   text-orig-norm       norm
   |      |   text-orig-plain      plain
   |     0.01s OK
   |     0.00s Following director... 
   |    -0.00s No tag mistake(s) found...
   |     0.52s "edge" actions: 0
   |     0.53s "feature" actions: 63731
   |     0.53s "node" actions: 2052
   |     0.53s "resume" actions: 0
   |     0.53s "slot" actions: 8811
   |     0.53s "terminate" actions: 2124
   |          1 x "_book" node 
   |        703 x "

   |     0.00s Removing unlinked nodes ... 
   |      |    -0.00s      2 unlinked "_sentence" nodes: [1, 365]
   |      |     0.00s      2 unlinked nodes
   |      |     0.00s Leaving   4046 nodes
   |     0.00s checking for nodes and edges ... 
   |     0.00s OK
   |     0.00s checking features ... 
   |     0.00s OK
   |     0.00s reordering nodes ...
   |     0.00s Sorting 1 nodes of type "_book"
   |     0.00s Sorting 363 nodes of type "_sentence"
   |     0.00s Sorting 2 nodes of type "add"
   |     0.01s Sorting 3 nodes of type "bibl"
   |     0.01s Sorting 6 nodes of type "l"
   |     0.01s Sorting 192 nodes of type "label"
   |     0.01s Sorting 44 nodes of type "milestone"
   |     0.01s Sorting 192 nodes of type "p"
   |     0.01s Sorting 15 nodes of type "page"
   |     0.01s Sorting 9 nodes of type "perseus"
   |     0.01s Sorting 4 nodes of type "q"
   |     0.02s Sorting 3 nodes of type "quote"
   |     0.02s Sorting 192 nodes of type "said"
   |     0.02s Sorting 1 nodes

   |     0.01s Sorting 233 nodes of type "p"
   |     0.01s Sorting 81 nodes of type "page"
   |     0.01s Sorting 5 nodes of type "q"
   |     0.01s Sorting 2 nodes of type "quote"
   |     0.01s Sorting 233 nodes of type "said"
   |     0.02s Sorting 81 nodes of type "section"
   |     0.02s Max node = 6739
   |     0.02s OK
   |     0.00s reassigning feature values ...
   |      |     0.03s node feature "_book" with 1 node
   |      |     0.03s node feature "_sentence" with 446 nodes
   |      |     0.03s node feature "beta_plain" with 5413 nodes
   |      |     0.03s node feature "bibl" with 1 node
   |      |     0.03s node feature "cit" with 1 node
   |      |     0.03s node feature "del" with 7 nodes
   |      |     0.03s node feature "head" with 1 node
   |      |     0.03s node feature "l" with 2 nodes
   |      |     0.03s node feature "label" with 233 nodes
   |      |     0.03s node feature "lemma" with 5413 nodes
   |      |     0.03s node feature "main" with 5413 nodes
  




    57s parsing /home/ernstboogert/github/pthu/sources/greek_sources/canonical-greekLit/data/tlg0059/tlg010/tlg0059.tlg010.perseus-grc2.xml

This is Text-Fabric 7.8.7
Api reference : https://annotation.github.io/text-fabric/Api/Fabric/

0 features found and 0 ignored
  0.00s Warp feature "otype" not found in
/home/ernstboogert/github/pthu/greek_literature/Plato/Philebus/tf/1.0/
  0.00s Warp feature "oslots" not found in
/home/ernstboogert/github/pthu/greek_literature/Plato/Philebus/tf/1.0/
  0.00s Warp feature "otext" not found. Working without Text-API

  0.00s Importing data from walking through the source ...
   |     0.00s Preparing metadata... 
   |   SECTION   TYPES:    _book, perseus, page
   |   SECTION   FEATURES: _book, perseus, page
   |   STRUCTURE TYPES:    _book, perseus, page, section
   |   STRUCTURE FEATURES: _book, perseus, page, section
   |   TEXT      FEATURES:
   |      |   text-orig-beta-plain beta_plain
   |      |   text-orig-full       orig
   |      |   te

   |          1 x "_book" node 
   |       2021 x "_sentence" node 
   |         16 x "add" node 
   |          1 x "bibl" node 
   |          1 x "cit" node 
   |          1 x "corr" node 
   |         38 x "del" node 
   |          2 x "l" node 
   |       1142 x "label" node 
   |       1142 x "p" node 
   |         86 x "page" node 
   |         57 x "perseus" node 
   |         16 x "q" node 
   |          1 x "quote" node 
   |       1142 x "said" node 
   |        311 x "section" node 
   |          1 x "term" node 
   |      18861 x "word" node  = slot type
   |      24840 nodes of all types
   |     1.16s OK
   |     0.00s Removing unlinked nodes ... 
   |      |    -0.00s      2 unlinked "_sentence" nodes: [1, 2021]
   |      |     0.00s      2 unlinked nodes
   |      |     0.00s Leaving  24838 nodes
   |     0.00s checking for nodes and edges ... 
   |     0.00s OK
   |     0.00s checking features ... 
   |     0.00s OK
   |     0.00s reordering nodes ...
   |     0.01s Sor

   |     1.08s "resume" actions: 4
   |     1.08s "slot" actions: 18315
   |     1.08s "terminate" actions: 5849
   |          1 x "_book" node 
   |       2083 x "_sentence" node 
   |          6 x "add" node 
   |         18 x "del" node 
   |          1 x "head" node 
   |        898 x "label" node 
   |        898 x "p" node 
   |         89 x "page" node 
   |         56 x "perseus" node 
   |        898 x "said" node 
   |        306 x "section" node 
   |      18315 x "word" node  = slot type
   |      23569 nodes of all types
   |     1.10s OK
   |     0.00s Removing unlinked nodes ... 
   |      |    -0.00s      1 unlinked "perseus" node: [1]
   |      |     0.00s      1 unlinked "page" node: [1]
   |      |     0.00s      1 unlinked "section" node: [1]
   |      |     0.00s      2 unlinked "_sentence" nodes: [1, 2083]
   |      |     0.00s      5 unlinked nodes
   |      |     0.00s Leaving  23564 nodes
   |     0.00s checking for nodes and edges ... 
   |     0.00s OK
   |  

   |     0.01s Sorting 2 nodes of type "cit"
   |     0.01s Sorting 3 nodes of type "del"
   |     0.01s Sorting 15 nodes of type "l"
   |     0.01s Sorting 570 nodes of type "label"
   |     0.02s Sorting 153 nodes of type "milestone"
   |     0.02s Sorting 570 nodes of type "p"
   |     0.02s Sorting 54 nodes of type "page"
   |     0.03s Sorting 31 nodes of type "perseus"
   |     0.03s Sorting 16 nodes of type "q"
   |     0.03s Sorting 5 nodes of type "quote"
   |     0.03s Sorting 570 nodes of type "said"
   |     0.03s Sorting 1 nodes of type "sic"
   |     0.03s Max node = 13288
   |     0.03s OK
   |     0.00s reassigning feature values ...
   |      |     0.04s node feature "_book" with 1 node
   |      |     0.04s node feature "_sentence" with 1031 nodes
   |      |     0.04s node feature "add" with 2 nodes
   |      |     0.04s node feature "beta_plain" with 10262 nodes
   |      |     0.05s node feature "bibl" with 3 nodes
   |      |     0.05s node feature "cit" with 2 no

   |     0.02s Sorting 51 nodes of type "page"
   |     0.02s Sorting 26 nodes of type "perseus"
   |     0.02s Sorting 33 nodes of type "q"
   |     0.02s Sorting 27 nodes of type "speech"
   |     0.02s Max node = 9374
   |     0.02s OK
   |     0.00s reassigning feature values ...
   |      |     0.03s node feature "_book" with 1 node
   |      |     0.03s node feature "_sentence" with 265 nodes
   |      |     0.03s node feature "add" with 3 nodes
   |      |     0.04s node feature "beta_plain" with 8750 nodes
   |      |     0.04s node feature "del" with 4 nodes
   |      |     0.04s node feature "head" with 1 node
   |      |     0.04s node feature "lemma" with 8750 nodes
   |      |     0.05s node feature "main" with 8750 nodes
   |      |     0.05s node feature "milestone" with 189 nodes
   |      |     0.05s node feature "norm" with 8750 nodes
   |      |     0.05s node feature "orig" with 8750 nodes
   |      |     0.05s node feature "p" with 26 nodes
   |      |     0.05s no

   |      |     0.12s node feature "norm" with 23550 nodes
   |      |     0.13s node feature "orig" with 23550 nodes
   |      |     0.14s node feature "p" with 1029 nodes
   |      |     0.14s node feature "page" with 116 nodes
   |      |     0.14s node feature "perseus" with 69 nodes
   |      |     0.14s node feature "plain" with 23550 nodes
   |      |     0.15s node feature "post" with 23550 nodes
   |      |     0.16s node feature "pre" with 2 nodes
   |      |     0.16s node feature "q" with 71 nodes
   |      |     0.16s node feature "quote" with 2 nodes
   |      |     0.16s node feature "said" with 1029 nodes
   |      |     0.16s node feature "sic" with 3 nodes
   |      |     0.16s node feature "term" with 1 node
   |     0.08s OK
  0.00s Exporting 25 node and 1 edge and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Plato/Theaetetus/tf/1.0:
  0.00s VALIDATING oslots feature
  0.01s VALIDATING oslots feature
  0.01s maxSlot=      23550
  0.01s maxNod

   |      |     0.04s node feature "page" with 24 nodes
   |      |     0.04s node feature "perseus" with 14 nodes
   |      |     0.04s node feature "plain" with 4417 nodes
   |      |     0.04s node feature "post" with 4417 nodes
   |      |     0.05s node feature "pre" with 1 node
   |      |     0.05s node feature "quote" with 9 nodes
   |      |     0.05s node feature "said" with 231 nodes
   |     0.02s OK
  0.00s Exporting 23 node and 1 edge and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Plato/Hippias Minor/tf/1.0:
  0.00s VALIDATING oslots feature
  0.00s VALIDATING oslots feature
  0.00s maxSlot=       4417
  0.00s maxNode=       5679
  0.00s OK: oslots is valid
   |     0.00s T _book                to /home/ernstboogert/github/pthu/greek_literature/Plato/Hippias Minor/tf/1.0
   |     0.00s T _sentence            to /home/ernstboogert/github/pthu/greek_literature/Plato/Hippias Minor/tf/1.0
   |     0.00s T add                  to /home/ernstboogert/gi

   |      |     0.17s node feature "pre" with 6 nodes
   |      |     0.17s node feature "q" with 33 nodes
   |      |     0.17s node feature "quote" with 15 nodes
   |      |     0.17s node feature "said" with 1107 nodes
   |     0.09s OK
  0.00s Exporting 25 node and 1 edge and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Plato/Gorgias/tf/1.0:
  0.00s VALIDATING oslots feature
  0.01s VALIDATING oslots feature
  0.01s maxSlot=      27386
  0.01s maxNode=      33613
  0.01s OK: oslots is valid
   |     0.00s T _book                to /home/ernstboogert/github/pthu/greek_literature/Plato/Gorgias/tf/1.0
   |     0.01s T _sentence            to /home/ernstboogert/github/pthu/greek_literature/Plato/Gorgias/tf/1.0
   |     0.00s T add                  to /home/ernstboogert/github/pthu/greek_literature/Plato/Gorgias/tf/1.0
   |     0.05s T beta_plain           to /home/ernstboogert/github/pthu/greek_literature/Plato/Gorgias/tf/1.0
   |     0.00s T bibl               

   |     0.00s T _book                to /home/ernstboogert/github/pthu/greek_literature/Plato/Timaeus/tf/1.0
   |     0.00s T _sentence            to /home/ernstboogert/github/pthu/greek_literature/Plato/Timaeus/tf/1.0
   |     0.04s T beta_plain           to /home/ernstboogert/github/pthu/greek_literature/Plato/Timaeus/tf/1.0
   |     0.00s T del                  to /home/ernstboogert/github/pthu/greek_literature/Plato/Timaeus/tf/1.0
   |     0.00s T label                to /home/ernstboogert/github/pthu/greek_literature/Plato/Timaeus/tf/1.0
   |     0.07s T lemma                to /home/ernstboogert/github/pthu/greek_literature/Plato/Timaeus/tf/1.0
   |     0.06s T main                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Timaeus/tf/1.0
   |     0.01s T milestone            to /home/ernstboogert/github/pthu/greek_literature/Plato/Timaeus/tf/1.0
   |     0.06s T norm                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Timaeus/tf/1.0
   |     0

   |     0.00s T gap                  to /home/ernstboogert/github/pthu/greek_literature/Plato/Symposium/tf/1.0
   |     0.00s T l                    to /home/ernstboogert/github/pthu/greek_literature/Plato/Symposium/tf/1.0
   |     0.00s T label                to /home/ernstboogert/github/pthu/greek_literature/Plato/Symposium/tf/1.0
   |     0.04s T lemma                to /home/ernstboogert/github/pthu/greek_literature/Plato/Symposium/tf/1.0
   |     0.04s T main                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Symposium/tf/1.0
   |     0.00s T milestone            to /home/ernstboogert/github/pthu/greek_literature/Plato/Symposium/tf/1.0
   |     0.05s T norm                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Symposium/tf/1.0
   |     0.04s T orig                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Symposium/tf/1.0
   |     0.01s T otype                to /home/ernstboogert/github/pthu/greek_literature/Plato/Symposium

   |     0.05s T lemma                to /home/ernstboogert/github/pthu/greek_literature/Plato/Epistles/tf/1.0
   |     0.00s T letter               to /home/ernstboogert/github/pthu/greek_literature/Plato/Epistles/tf/1.0
   |     0.04s T main                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Epistles/tf/1.0
   |     0.00s T milestone            to /home/ernstboogert/github/pthu/greek_literature/Plato/Epistles/tf/1.0
   |     0.06s T norm                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Epistles/tf/1.0
   |     0.04s T orig                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Epistles/tf/1.0
   |     0.01s T otype                to /home/ernstboogert/github/pthu/greek_literature/Plato/Epistles/tf/1.0
   |     0.00s T p                    to /home/ernstboogert/github/pthu/greek_literature/Plato/Epistles/tf/1.0
   |     0.00s T page                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Epistles/tf/1.0
 

   |     0.00s T page                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Laches/tf/1.0
   |     0.00s T perseus              to /home/ernstboogert/github/pthu/greek_literature/Plato/Laches/tf/1.0
   |     0.02s T plain                to /home/ernstboogert/github/pthu/greek_literature/Plato/Laches/tf/1.0
   |     0.01s T post                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Laches/tf/1.0
   |     0.00s T pre                  to /home/ernstboogert/github/pthu/greek_literature/Plato/Laches/tf/1.0
   |     0.00s T q                    to /home/ernstboogert/github/pthu/greek_literature/Plato/Laches/tf/1.0
   |     0.00s T quote                to /home/ernstboogert/github/pthu/greek_literature/Plato/Laches/tf/1.0
   |     0.00s T said                 to /home/ernstboogert/github/pthu/greek_literature/Plato/Laches/tf/1.0
   |     0.00s T sic                  to /home/ernstboogert/github/pthu/greek_literature/Plato/Laches/tf/1.0
   |     0.02s T os

   |     0.02s T oslots               to /home/ernstboogert/github/pthu/greek_literature/Plato/Charmides/tf/1.0
   |     0.00s M otext                to /home/ernstboogert/github/pthu/greek_literature/Plato/Charmides/tf/1.0
  0.23s Exported 21 node features and 1 edge features and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Plato/Charmides/tf/1.0



 1m 15s parsing /home/ernstboogert/github/pthu/sources/greek_sources/canonical-greekLit/data/tlg0059/tlg032/tlg0059.tlg032.perseus-grc2.xml

This is Text-Fabric 7.8.7
Api reference : https://annotation.github.io/text-fabric/Api/Fabric/

0 features found and 0 ignored
  0.00s Warp feature "otype" not found in
/home/ernstboogert/github/pthu/greek_literature/Plato/Critias/tf/1.0/
  0.00s Warp feature "oslots" not found in
/home/ernstboogert/github/pthu/greek_literature/Plato/Critias/tf/1.0/
  0.00s Warp feature "otext" not found. Working without Text-API

  0.00s Importing data from walking through the source ...
   | 

   |   STRUCTURE FEATURES: _book, perseus, page
   |   TEXT      FEATURES:
   |      |   text-orig-beta-plain beta_plain
   |      |   text-orig-full       orig
   |      |   text-orig-lemma      lemma
   |      |   text-orig-main       main
   |      |   text-orig-norm       norm
   |      |   text-orig-plain      plain
   |     0.02s OK
   |     0.00s Following director... 
   |    -0.00s No tag mistake(s) found...
   |     0.88s "edge" actions: 0
   |     0.88s "feature" actions: 109512
   |     0.88s "node" actions: 2636
   |     0.88s "resume" actions: 24
   |     0.88s "slot" actions: 15261
   |     0.88s "terminate" actions: 2784
   |          1 x "_book" node 
   |        978 x "_sentence" node 
   |          5 x "add" node 
   |         14 x "del" node 
   |        381 x "milestone" node 
   |         41 x "p" node 
   |         78 x "page" node 
   |         41 x "perseus" node 
   |       1072 x "said" node 
   |         25 x "term" node 
   |      15261 x "word" node  = slo

   |     0.00s Sorting 200 nodes of type "_sentence"
   |     0.01s Sorting 1 nodes of type "add"
   |     0.01s Sorting 5 nodes of type "del"
   |     0.01s Sorting 46 nodes of type "label"
   |     0.01s Sorting 113 nodes of type "milestone"
   |     0.01s Sorting 46 nodes of type "p"
   |     0.01s Sorting 37 nodes of type "page"
   |     0.01s Sorting 20 nodes of type "perseus"
   |     0.02s Sorting 46 nodes of type "said"
   |     0.02s Sorting 2 nodes of type "sic"
   |     0.02s Max node = 6879
   |     0.02s OK
   |     0.00s reassigning feature values ...
   |      |     0.02s node feature "_book" with 1 node
   |      |     0.02s node feature "_sentence" with 201 nodes
   |      |     0.02s node feature "add" with 1 node
   |      |     0.02s node feature "beta_plain" with 6362 nodes
   |      |     0.03s node feature "del" with 5 nodes
   |      |     0.03s node feature "label" with 46 nodes
   |      |     0.03s node feature "lemma" with 6362 nodes
   |      |     0.03s no

   |      |     0.04s node feature "l" with 1 node
   |      |     0.04s node feature "label" with 102 nodes
   |      |     0.04s node feature "lemma" with 4277 nodes
   |      |     0.04s node feature "main" with 4277 nodes
   |      |     0.04s node feature "milestone" with 63 nodes
   |      |     0.04s node feature "norm" with 4277 nodes
   |      |     0.04s node feature "orig" with 4277 nodes
   |      |     0.05s node feature "p" with 102 nodes
   |      |     0.05s node feature "page" with 22 nodes
   |      |     0.05s node feature "perseus" with 12 nodes
   |      |     0.05s node feature "plain" with 4277 nodes
   |      |     0.05s node feature "post" with 4277 nodes
   |      |     0.05s node feature "pre" with 1 node
   |      |     0.05s node feature "q" with 21 nodes
   |      |     0.05s node feature "quote" with 1 node
   |      |     0.05s node feature "said" with 102 nodes
   |     0.02s OK
  0.00s Exporting 24 node and 1 edge and 1 config features to /home/ernstbo

   |      |     0.10s node feature "milestone" with 1 node
   |      |     0.10s node feature "norm" with 19034 nodes
   |      |     0.10s node feature "orig" with 19034 nodes
   |      |     0.11s node feature "p" with 774 nodes
   |      |     0.11s node feature "page" with 93 nodes
   |      |     0.11s node feature "perseus" with 58 nodes
   |      |     0.11s node feature "plain" with 19034 nodes
   |      |     0.12s node feature "post" with 19033 nodes
   |      |     0.13s node feature "pre" with 6 nodes
   |      |     0.13s node feature "q" with 397 nodes
   |      |     0.13s node feature "quote" with 8 nodes
   |      |     0.13s node feature "said" with 774 nodes
   |      |     0.13s node feature "section" with 321 nodes
   |      |     0.13s node feature "sic" with 2 nodes
   |      |     0.13s node feature "term" with 189 nodes
   |     0.07s OK
  0.00s Exporting 27 node and 1 edge and 1 config features to /home/ernstboogert/github/pthu/greek_literature/Plato/Cratylus/

IndexError: list index out of range