Skip to content

Commit

Permalink
New Go based XML reader
Browse files Browse the repository at this point in the history
This includes a copy of Go's XML processing library until the build
system has Go version 1.8.

See golang/go#45628
  • Loading branch information
pgundlach committed May 25, 2021
1 parent 1881571 commit 93fefcf
Show file tree
Hide file tree
Showing 22 changed files with 9,403 additions and 14 deletions.
8 changes: 8 additions & 0 deletions doc/commands-xml/commands.xml
Expand Up @@ -1924,6 +1924,14 @@
</description>
</choice>
</attribute>
<attribute en="luaxmlreader" optional="yes" since="4.5.1" type="boolean">
<description xml:lang="en">
<para>Use old Lua based XML reader.</para>
</description>
<description xml:lang="de">
<para>Benutze alten Lua-basierten XML Parser.</para>
</description>
</attribute>
<example xml:lang="en">
<listing><![CDATA[<Compatibility
movecursoronplaceobject="no"
Expand Down
9 changes: 9 additions & 0 deletions schema/layoutschema-de.rng
Expand Up @@ -973,6 +973,15 @@
</choice>
</attribute>
</optional>
<optional>
<attribute name="luaxmlreader">
<a:documentation>Benutze alten Lua-basierten XML Parser.</a:documentation>
<choice>
<value>yes</value>
<value>no</value>
</choice>
</attribute>
</optional>
<empty></empty>
</element>
</define>
Expand Down
11 changes: 11 additions & 0 deletions schema/layoutschema-de.xsd
Expand Up @@ -814,6 +814,17 @@
</xs:restriction>
</xs:simpleType>
</xs:attribute>
<xs:attribute name="luaxmlreader">
<xs:annotation>
<xs:documentation>Benutze alten Lua-basierten XML Parser.</xs:documentation>
</xs:annotation>
<xs:simpleType>
<xs:restriction base="xs:token">
<xs:enumeration value="yes"/>
<xs:enumeration value="no"/>
</xs:restriction>
</xs:simpleType>
</xs:attribute>
</xs:complexType>
</xs:element>
<xs:element name="Contents">
Expand Down
9 changes: 9 additions & 0 deletions schema/layoutschema-en.rng
Expand Up @@ -973,6 +973,15 @@
</choice>
</attribute>
</optional>
<optional>
<attribute name="luaxmlreader">
<a:documentation>Use old Lua based XML reader.</a:documentation>
<choice>
<value>yes</value>
<value>no</value>
</choice>
</attribute>
</optional>
<empty></empty>
</element>
</define>
Expand Down
11 changes: 11 additions & 0 deletions schema/layoutschema-en.xsd
Expand Up @@ -814,6 +814,17 @@
</xs:restriction>
</xs:simpleType>
</xs:attribute>
<xs:attribute name="luaxmlreader">
<xs:annotation>
<xs:documentation>Use old Lua based XML reader.</xs:documentation>
</xs:annotation>
<xs:simpleType>
<xs:restriction base="xs:token">
<xs:enumeration value="yes"/>
<xs:enumeration value="no"/>
</xs:restriction>
</xs:simpleType>
</xs:attribute>
</xs:complexType>
</xs:element>
<xs:element name="Contents">
Expand Down
10 changes: 10 additions & 0 deletions src/go/splib/splib.go
Expand Up @@ -240,4 +240,14 @@ func sdSegmentize(original string) *C.struct_splitvalues {

return returnStruct
}

//export sdReadXMLFile
func sdReadXMLFile(filename string) *C.char {
str, err := splibaux.ReadXMLFile(filename)
if err != nil {
return s2c(errorpattern + err.Error())
}
return s2c(str)
}

func main() {}
21 changes: 21 additions & 0 deletions src/go/splibaux/splibaux.go
Expand Up @@ -18,6 +18,7 @@ var (
files map[string]string
ignorefile string
verbosity int
nr *strings.Replacer
)

func init() {
Expand All @@ -27,6 +28,7 @@ func init() {
if v := os.Getenv("SP_VERBOSITY"); v != "" {
verbosity, _ = strconv.Atoi(v)
}
nr = strings.NewReplacer("\n", `\n`, `"`, `\"`, `\`, `\\`)
}

func downloadFile(resourceURL string, outfile io.Writer) error {
Expand Down Expand Up @@ -308,3 +310,22 @@ func ConvertSVGImage(filename string) (string, error) {
}
return pdffile, nil
}

func handleXInclude(href string, startindex, indent int) (string, error) {
fullpath := LookupFile(href)
f, err := os.Open(fullpath)
if err != nil {
return "", err
}
return readXMLFile(f, startindex, indent)
}

func ReadXMLFile(filename string) (string, error) {
fullpath := LookupFile(filename)
f, err := os.Open(fullpath)
if err != nil {
return "", err
}
str, err := readXMLFile(f, 1, 0)
return "tbl = {" + str + "}", err
}
112 changes: 112 additions & 0 deletions src/go/splibaux/xmlparser.go
@@ -0,0 +1,112 @@
package splibaux

import (
"fmt"
"io"
"strings"

"speedatapublisher/xml"
)

func luaescape(s string) string {
return nr.Replace(s)
}

func indent(i int) string {
return strings.Repeat(" ", i)
}

func readXMLFile(r io.Reader, startindex, extraindentlevel int) (string, error) {
var out strings.Builder

stackcounter := []int{startindex}

dec := xml.NewDecoder(r)
dec.Entity = xml.HTMLEntity
indentlevel := 0
for {
tok, err := dec.Token()
if err == io.EOF {
break
}
if err != nil {
return "", err
}
indentamount := indentlevel + extraindentlevel + 1

switch v := tok.(type) {
case xml.StartElement:
var href string
if v.Name.Space == "http://www.w3.org/2001/XInclude" && v.Name.Local == "include" {
for _, attr := range v.Attr {
if attr.Name.Local == "href" {
href = attr.Value
}
}
str, err := handleXInclude(href, indentlevel+1, indentamount-1)
if err != nil {
return "", err
}
fmt.Fprintf(&out, "%s", str)
} else {

fmt.Fprintf(&out, "%s[%d] = {", indent(indentamount-1), stackcounter[indentlevel])
fmt.Fprintln(&out)
fmt.Fprintf(&out, `%s[".__name"] = "%s",`, indent(indentamount), luaescape(v.Name.Local))
fmt.Fprintln(&out)
fmt.Fprintf(&out, `%s[".__type"] = "element",`, indent(indentamount))
fmt.Fprintln(&out)
fmt.Fprintf(&out, `%s[".__local_name"] = "%s",`, indent(indentamount), luaescape(v.Name.Local))
fmt.Fprintln(&out)
fmt.Fprintf(&out, `%s[".__namespace"] = "%s",`, indent(indentamount), luaescape(v.Name.Space))
fmt.Fprintln(&out)
line, col := dec.InputPos()
fmt.Fprintf(&out, `%s[".__line"] = %d,[".__col"] = %d,`, indent(indentamount), line, col)
fmt.Fprintln(&out)

attributes := make(map[string]string)

fmt.Fprintf(&out, `%s[".__ns"] = {`, indent(indentamount))
fmt.Fprintln(&out)
for _, attr := range v.Attr {
if attr.Name.Space == "xmlns" {
fmt.Fprintf(&out, `%s["%s"] = "%s",`, indent(indentamount+1), attr.Name.Local, attr.Value)
fmt.Fprintln(&out)
} else if attr.Name.Local == "xmlns" {
fmt.Fprintf(&out, `%s[""] = "%s",`, indent(indentamount+1), attr.Value)
fmt.Fprintln(&out)
} else {
attributes[attr.Name.Local] = attr.Value
}
}
fmt.Fprintf(&out, "%s},\n", indent(indentamount))
for k, val := range attributes {
fmt.Fprintf(&out, `%s["%s"] = "%s", `, indent(indentamount), luaescape(k), luaescape(val))
fmt.Fprintln(&out)
}
}
stackcounter[indentlevel]++
indentlevel++
stackcounter = append(stackcounter, 1)
case xml.CharData:
if indentlevel > 0 {

index := stackcounter[indentlevel]
stackcounter[indentlevel] = index + 1
fmt.Fprintf(&out, "%s[%d] = ", indent(indentlevel+extraindentlevel), index)
fmt.Fprintf(&out, `"%s",`, luaescape(string(v.Copy())))
fmt.Fprintln(&out)
}
case xml.EndElement:
if v.Name.Space == "http://www.w3.org/2001/XInclude" && v.Name.Local == "include" {
// ignore
} else {
fmt.Fprintf(&out, "%s},\n", indent(indentlevel+extraindentlevel))
}

stackcounter = stackcounter[:len(stackcounter)-1]
indentlevel--
}
}
return out.String(), nil
}
56 changes: 56 additions & 0 deletions src/go/xml/atom_test.go
@@ -0,0 +1,56 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package xml

import "time"

var atomValue = &Feed{
XMLName: Name{"http://www.w3.org/2005/Atom", "feed"},
Title: "Example Feed",
Link: []Link{{Href: "http://example.org/"}},
Updated: ParseTime("2003-12-13T18:30:02Z"),
Author: Person{Name: "John Doe"},
ID: "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6",

Entry: []Entry{
{
Title: "Atom-Powered Robots Run Amok",
Link: []Link{{Href: "http://example.org/2003/12/13/atom03"}},
ID: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
Updated: ParseTime("2003-12-13T18:30:02Z"),
Summary: NewText("Some text."),
},
},
}

var atomXML = `` +
`<feed xmlns="http://www.w3.org/2005/Atom" updated="2003-12-13T18:30:02Z">` +
`<title>Example Feed</title>` +
`<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>` +
`<link href="http://example.org/"></link>` +
`<author><name>John Doe</name><uri></uri><email></email></author>` +
`<entry>` +
`<title>Atom-Powered Robots Run Amok</title>` +
`<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>` +
`<link href="http://example.org/2003/12/13/atom03"></link>` +
`<updated>2003-12-13T18:30:02Z</updated>` +
`<author><name></name><uri></uri><email></email></author>` +
`<summary>Some text.</summary>` +
`</entry>` +
`</feed>`

func ParseTime(str string) time.Time {
t, err := time.Parse(time.RFC3339, str)
if err != nil {
panic(err)
}
return t
}

func NewText(text string) Text {
return Text{
Body: text,
}
}
84 changes: 84 additions & 0 deletions src/go/xml/example_marshaling_test.go
@@ -0,0 +1,84 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package xml_test

import (
"encoding/xml"
"fmt"
"log"
"strings"
)

type Animal int

const (
Unknown Animal = iota
Gopher
Zebra
)

func (a *Animal) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
var s string
if err := d.DecodeElement(&s, &start); err != nil {
return err
}
switch strings.ToLower(s) {
default:
*a = Unknown
case "gopher":
*a = Gopher
case "zebra":
*a = Zebra
}

return nil
}

func (a Animal) MarshalXML(e *xml.Encoder, start xml.StartElement) error {
var s string
switch a {
default:
s = "unknown"
case Gopher:
s = "gopher"
case Zebra:
s = "zebra"
}
return e.EncodeElement(s, start)
}

func Example_customMarshalXML() {
blob := `
<animals>
<animal>gopher</animal>
<animal>armadillo</animal>
<animal>zebra</animal>
<animal>unknown</animal>
<animal>gopher</animal>
<animal>bee</animal>
<animal>gopher</animal>
<animal>zebra</animal>
</animals>`
var zoo struct {
Animals []Animal `xml:"animal"`
}
if err := xml.Unmarshal([]byte(blob), &zoo); err != nil {
log.Fatal(err)
}

census := make(map[Animal]int)
for _, animal := range zoo.Animals {
census[animal] += 1
}

fmt.Printf("Zoo Census:\n* Gophers: %d\n* Zebras: %d\n* Unknown: %d\n",
census[Gopher], census[Zebra], census[Unknown])

// Output:
// Zoo Census:
// * Gophers: 3
// * Zebras: 2
// * Unknown: 3
}

0 comments on commit 93fefcf

Please sign in to comment.