Skip to content
Permalink
Browse files

gen-defs: Python script to generate definitions from YAML

  • Loading branch information...
kba committed Oct 23, 2016
1 parent d70f840 commit 2af507ed1af680fc5403c90aa667ad11a3b1330f
Showing with 275 additions and 3 deletions.
  1. +161 −0 1.2/defs.yml
  2. +36 −0 1.2/templates/element
  3. +17 −0 1.2/templates/property
  4. +11 −3 Makefile
  5. +50 −0 gen-defs.py
@@ -0,0 +1,161 @@
element:
ocr_abstract:
categories: ['Logical Structuring']
ocr_author:
categories: ['Inline']
ocr_blockquote:
categories: ['Logical Structuring']
ocr_caption:
categories: ['Logical Structuring']
ocr_carea:
categories: ['Typesetting']
properties:
required: ['bbox']
ocr_chapter:
categories: ['Logical Structuring']
ocr_chem:
categories: ['Float']
ocr_cinfo:
categories: ['Float']
ocr_column:
categories: ['Typesetting']
deprecated: true
ocr_display:
categories: ['Float']
ocr_document:
categories: ['Logical Structuring']
recommended_tags: ['h1']
ocr_dropcap:
categories: ['Inline']
ocr_float:
categories: ['Float']
ocr_footer:
categories: ['Float']
ocr_glyph:
categories: ['Inline']
ocr_glyphs:
categories: ['Inline']
ocr_header:
categories: ['Float']
ocr_image:
categories: ['Float']
ocr_line:
categories: ['Typesetting']
ocr_linear:
categories: ['Inline']
ocr_linedrawing:
categories: ['Float']
ocr_math:
categories: ['Float']
ocr_noise:
categories: ['Inline']
ocr_page:
categories: ['Typesetting']
properties:
required: ['bbox']
recommended: ['image', 'imagemd5']
allowed: ['x_source']
ocr_pageno:
categories: ['Float']
ocr_par:
categories: ['Inline']
ocr_part:
categories: ['Inline']
ocr_photo:
categories: ['Inline']
ocr_section:
categories: ['Logical Structuring']
ocr_separator:
categories: ['Inline']
ocr_separator0:
categories: ['Inline']
ocr_subsection:
categories: ['Logical Structuring']
ocr_subsubsection:
categories: ['Logical Structuring']
ocr_table:
categories: ['Float']
ocr_textfloat:
categories: ["Logical Structuring"]
ocr_textimage:
categories: ['Float']
ocr_title:
categories: ['Inline']
ocr_xycut:
categories: ['Inline']
ocrx_block:
categories: ['Inline', 'Engine-Specific']
ocrx_line:
categories: ['Inline', 'Engine-Specific']
ocrx_word:
categories: ['Inline', 'Engine-Specific']


property:
bbox:
categories: ['Layout']
syntax: '
<a lt="unsigned short">x0</a>
<a lt="unsigned short">y0</a>
<a lt="unsigned short">x1</a>
<a lt="unsigned short">x1</a>
'
baseline:
categories: ['Inline']
syntax: '<a lt="float">pn</a> <a lt="float">pn-1</a> ... <a lt="float">p0</a>'
cflow:
categories: ['Flow']
syntax: '<a lt="string">clfowid</a>'
cuts:
categories: ['Layout']
syntax: "
<a lt='unsigned short'>c1dx</a>[,<a lt='unsigned short'>c1dy</a>[,<a lt='short'>c1dx2</a>]]
[<a lt='unsigned short'>c2dx</a>[,<a lt='unsigned short'>c2dy</a>[,<a lt='short'>c2dx2</a>]]]...
"
hardbreak:
categories: ['Inline']
syntax: '0|1'
default: 0
image:
categories: ['Page']
syntax: '<a lt="string">url</a>'
imagemd5:
categories: ['Page']
syntax: '<a lt="string">md5</a>'
# lpageno:
# categories: ['Page']
# nlp:
# categories: ['Confidence']
# order:
# categories: ['Flow']
# poly:
# categories: ['Layout']
# ppageno:
# categories: ['Flow']
# scan_res:
# categories: ['Page']
# textangle:
# categories: ['Layout']
# x_bboxes:
# categories: ['Layout']
# x_confs:
# categories: ['Confidence']
# x_font:
# categories: ['Font']
# x_fsize:
# categories: ['Font']
# x_scanner:
# categories: ['Page']
# x_source:
# categories: ['Page']
# x_wconf:
# categories: ['Confidence']

metadata:
ocr-system:
value: ...
required: true
ocr-capabilities:
ocr-number-of-pages:
ocr-langs:
ocr-scripts:
@@ -0,0 +1,36 @@
<dl class="def">

: Element Name
{% if deprecated %}
:: <del>{{ name }}</del> (Deprecated)
{% else %}
:: <a element>{{name}}</a>
{% endif %}

{% if recommended_tags %}
: Recommended HTML Tags
:: {% for tag in recommended_tags %}<a element>{{ tag }}</a> {% if not loop.last %}, {% endif %} {% endfor %}
{% endif %}

{% if categories %}
: Categories
:: {% for category in categories %}<a>{{ category }} Elements</a> {% if not loop.last %}, {% endif %} {% endfor %}
{% endif %}

{% if properties and properties.allowed %}
: <a>Allowed Properties</a>
:: {% for prop in properties.allowed %}'{{ prop }}'{% if not loop.last %},{% endif %} {% endfor %}
{% endif %}

{% if properties and properties.required %}
: <a>Required Properties</a>
:: {% for prop in properties.required %}'{{ prop }}'{% if not loop.last %},{% endif %} {% endfor %}
{% endif %}

{% if properties and properties.recommended %}
: <a>Recommended Properties</a>
:: {% for prop in properties.recommended %}'{{ prop }}'{% if not loop.last %},{% endif %} {% endfor %}
{% endif %}


</dl>
@@ -0,0 +1,17 @@
<dl class=def>

: Property Name
:: {{ name }}

{% for category in categories %}
{% if loop.first %}
: <a>Property Categories</a>
{% endif %}
:: <a>{{ category }} Property</a>
{% endfor %}

: <a>Property Syntax</a>
:: <code style='display:block;padding: .5em' highlight=c>{{name}} {{ syntax }}</code>


</dl>
@@ -10,9 +10,14 @@ SPEC_HTML = $(VERSION)/index.html
BIKESHED = $(shell for cmd in bikeshed docker curl;do type >/dev/null 2>&1 $$cmd && echo $$cmd && break;done)
BIKESHED_ARGS = -f

$(SPEC_HTML): $(SPEC_METADATA) biblio.json $(SPEC_MD)
@echo 'Rebuilding spec...'
@cat $(SPEC_BEFORE) > $(SPEC_BS)
SPEC_DEFS = $(VERSION)/include/defs/bbox
SPEC_DEFS_YML = $(VERSION)/defs.yml
SPEC_DEFS_TEMPLATES = $(shell find $(VERSION)/templates/ -type f)
GEN_DEFS = python3 gen-defs.py

$(SPEC_HTML): $(SPEC_BEFORE) $(SPEC_MD) $(SPEC_BIBLIO) $(SPEC_AFTER) $(SPEC_DEFS)
echo 'Rebuilding spec...'
@cat $(SPEC_BEFORE) > $(SPEC_BS)
@echo '<pre class="biblio">' >> $(SPEC_BS)
@cat $(SPEC_BIBLIO) >> $(SPEC_BS)
@echo '</pre>' >> $(SPEC_BS)
@@ -24,5 +29,8 @@ $(SPEC_HTML): $(SPEC_METADATA) biblio.json $(SPEC_MD)
*) echo 'Unsupported bikeshed backend "$(BIKESHED)"'; exit 1 ;; esac
@rm -f $(SPEC_BS)

$(SPEC_DEFS): $(SPEC_DEFS_YML) $(SPEC_DEFS_TEMPLATES)
@$(GEN_DEFS) --basepath $(VERSION)

clean:
$(RM) $(SPEC_HTML) $(SPEC_BS)
@@ -0,0 +1,50 @@
#!/usr/bin/env python3

from jinja2 import Environment, FileSystemLoader, Template
import sys
import argparse
import yaml
import os


class DefGenerator:

def __init__(self, basepath, outputdir=None, defs_yml=None, templatedir=None):
if not templatedir: templatedir = "{0}/templates".format(basepath)
if not defs_yml: defs_yml = "{0}/defs.yml".format(args.basepath)
if not outputdir: outputdir = "{0}/include/defs".format(basepath)
self.outputdir = outputdir
env = Environment(loader=FileSystemLoader(templatedir),
lstrip_blocks=True, trim_blocks=True)
self.templates = {}
for name in ['property', 'element']:
self.templates[name] = env.get_template(name)
with open(defs_yml) as f:
self.specs = yaml.load(f)
os.makedirs(self.outputdir, exist_ok=True)

def generate(self):
for cat in self.templates:
sys.stderr.write("[{0}]\n\t".format(cat))
for name in self.specs[cat]:
definition = self.specs[cat][name]
definition['name'] = name
fname = "{0}/{1}".format(self.outputdir, name)
with open(fname, 'w') as f:
sys.stderr.write(name + " ")
f.write(self.templates[cat].render(definition))
sys.stderr.write("\n")

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--basepath', default=os.getcwd(),
help='Path for defs.yml and templates/. Default: %(default)s')
parser.add_argument('--defs_yml',
help='Definitions YAML. Default: [basepath]/defs.yml')
parser.add_argument('--templatedir',
help='Templates directory. Default: [basepath]/templates')
parser.add_argument('--outputdir',
help='Output directory. Default: [basepath]/include/defs')
args = parser.parse_args()
generator = DefGenerator(**vars(args))
generator.generate()

0 comments on commit 2af507e

Please sign in to comment.
You can’t perform that action at this time.