Skip to content

Commit

Permalink
Modify schema to allow text structure under figDesc, so that we can a…
Browse files Browse the repository at this point in the history
…dd sentence segmentation
  • Loading branch information
kermitt2 committed Aug 21, 2020
1 parent f39a3bc commit a1468ae
Show file tree
Hide file tree
Showing 6 changed files with 242 additions and 216 deletions.
412 changes: 207 additions & 205 deletions grobid-home/schemas/doc/Grobid_doc.html

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions grobid-home/schemas/dtd/Grobid.dtd
@@ -1,6 +1,6 @@

<!--
DTD generated from ODD source 2020-08-20T01:39:39Z. .
DTD generated from ODD source 2020-08-21T03:03:48Z. .
TEI Edition: Version 4.1.0. Last updated on
19th August 2020, revision b414ba550
TEI Edition Location: https://www.tei-c.org/Vault/P5/Version 4.1.0/
Expand Down Expand Up @@ -1291,7 +1291,7 @@ type %teidata.enumerated; #IMPLIED >
%tei_att.global.attributes;
%tei_att.declaring.attributes; >
<!--doc:(description of figure) contains a brief prose description of the appearance or content of a graphic figure, for use when documenting an image without displaying it. [14.4. Specific Elements for Graphic Images] -->
<!ELEMENT figDesc %macro.limitedContent;>
<!ELEMENT figDesc (#PCDATA|%model.limitedPhrase;|%model.inter;|%model.divLike;)*>
<!ATTLIST figDesc xmlns CDATA "http://www.tei-c.org/ns/1.0">
<!ATTLIST figDesc
%tei_att.global.attributes; >
Expand Down
16 changes: 16 additions & 0 deletions grobid-home/schemas/odd/Grobid.odd
Expand Up @@ -183,6 +183,21 @@
</attList>
</classSpec>

<!-- we modify figDesc so that it can contain generic structural divisions -->
<elementSpec ident="figDesc" module="figures" mode="change">
<content>
<rng:zeroOrMore>
<rng:choice>
<rng:text/>
<rng:ref name="model.limitedPhrase"/>
<rng:ref name="model.inter"/>
<rng:ref name="model.divLike"/>
</rng:choice>
</rng:zeroOrMore>
</content>

</elementSpec>

<!-- removing elements not used in Grobid results -->
<elementSpec ident="ab" mode="delete" module="linking"/>
<elementSpec ident="abbr" mode="delete" module="core"/>
Expand Down Expand Up @@ -357,6 +372,7 @@
<elementSpec ident="unclear" mode="delete" module="core"/>
<!-- only s from the analysis module, in case we want the final result with sentence segmentation -->
<moduleRef key="analysis" except="c cl interp interpGrp m pc phr span spanGrp w"/>

</schemaSpec>
</body>
</text>
Expand Down
Binary file modified grobid-home/schemas/rng/Grobid.rnc
Binary file not shown.
11 changes: 9 additions & 2 deletions grobid-home/schemas/rng/Grobid.rng
Expand Up @@ -5,7 +5,7 @@
xmlns="http://relaxng.org/ns/structure/1.0"
datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes"
ns="http://www.tei-c.org/ns/1.0"><!--
Schema generated from ODD source 2020-08-20T01:39:02Z. .
Schema generated from ODD source 2020-08-21T03:03:09Z. .
TEI Edition: Version 4.1.0. Last updated on
19th August 2020, revision b414ba550
TEI Edition Location: https://www.tei-c.org/Vault/P5/Version 4.1.0/
Expand Down Expand Up @@ -5026,7 +5026,14 @@ Suggested values include: 1] label; 2] data</a:documentation>
<define name="tei_figDesc">
<element name="figDesc">
<a:documentation xmlns:a="http://relaxng.org/ns/compatibility/annotations/1.0">(description of figure) contains a brief prose description of the appearance or content of a graphic figure, for use when documenting an image without displaying it. [14.4. Specific Elements for Graphic Images]</a:documentation>
<ref name="tei_macro.limitedContent"/>
<zeroOrMore>
<choice>
<text/>
<ref name="tei_model.limitedPhrase"/>
<ref name="tei_model.inter"/>
<ref name="tei_model.divLike"/>
</choice>
</zeroOrMore>
<ref name="tei_att.global.attributes"/>
<empty/>
</element>
Expand Down
15 changes: 8 additions & 7 deletions grobid-home/schemas/xsd/Grobid.xsd
Expand Up @@ -3,7 +3,7 @@
<xs:import namespace="http://www.isocat.org/ns/dcr" schemaLocation="dcr.xsd"/>
<xs:import namespace="http://www.w3.org/XML/1998/namespace" schemaLocation="xml.xsd"/>
<!--
Schema generated from ODD source 2020-08-20T01:39:31Z. .
Schema generated from ODD source 2020-08-21T03:03:40Z. .
TEI Edition: Version 4.1.0. Last updated on
19th August 2020, revision b414ba550
TEI Edition Location: https://www.tei-c.org/Vault/P5/Version 4.1.0/
Expand Down Expand Up @@ -4699,12 +4699,13 @@ Suggested values include: 1] label; 2] data</xs:documentation>
<xs:annotation>
<xs:documentation>(description of figure) contains a brief prose description of the appearance or content of a graphic figure, for use when documenting an image without displaying it. [14.4. Specific Elements for Graphic Images]</xs:documentation>
</xs:annotation>
<xs:complexType>
<xs:complexContent>
<xs:extension base="tei:tei_macro.limitedContent">
<xs:attributeGroup ref="tei:tei_att.global.attributes"/>
</xs:extension>
</xs:complexContent>
<xs:complexType mixed="true">
<xs:choice minOccurs="0" maxOccurs="unbounded">
<xs:group ref="tei:tei_model.limitedPhrase"/>
<xs:group ref="tei:tei_model.inter"/>
<xs:group ref="tei:tei_model.divLike"/>
</xs:choice>
<xs:attributeGroup ref="tei:tei_att.global.attributes"/>
</xs:complexType>
</xs:element>
<xs:attributeGroup name="tei_att.global.facs.attributes">
Expand Down

0 comments on commit a1468ae

Please sign in to comment.