Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pkg/ottl]: Add ParseXML converter #31487

Merged
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .chloggen/feat_ottl_xml-parse-function.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: "enhancement"

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/ottl

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add `ParseXML` function for parsing XML from a target string.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [31133]
16 changes: 16 additions & 0 deletions pkg/ottl/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,22 @@ func Test_e2e_converters(t *testing.T) {
m.PutStr("k2", "v2__!__v2")
},
},
{
statement: `set(attributes["test"], ParseXML("<Log id=\"1\"><Message>This is a log message!</Message></Log>"))`,
want: func(tCtx ottllog.TransformContext) {
log := tCtx.GetLogRecord().Attributes().PutEmptyMap("test")
log.PutStr("tag", "Log")

attrs := log.PutEmptyMap("attributes")
attrs.PutStr("id", "1")

logChildren := log.PutEmptySlice("children")

message := logChildren.AppendEmpty().SetEmptyMap()
message.PutStr("tag", "Message")
message.PutStr("content", "This is a log message!")
},
},
{
statement: `set(attributes["test"], Seconds(Duration("1m")))`,
want: func(tCtx ottllog.TransformContext) {
Expand Down
73 changes: 73 additions & 0 deletions pkg/ottl/ottlfuncs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ Available Converters:
- [ParseCSV](#parsecsv)
- [ParseJSON](#parsejson)
- [ParseKeyValue](#parsekeyvalue)
- [ParseXML](#parsexml)
- [Seconds](#seconds)
- [SHA1](#sha1)
- [SHA256](#sha256)
Expand Down Expand Up @@ -897,6 +898,78 @@ Examples:
- `ParseKeyValue(attributes["pairs"])`


### ParseXML

`ParseXML(target)`

The `ParseXML` Converter returns a `pcommon.Map` struct that is the result of parsing the target string as an XML document.

`target` is a Getter that returns a string. This string should be in XML format.
If `target` is not a string, nil, or cannot be parsed as XML, `ParseXML` will return an error.

Unmarshalling XML is done using the following rules:
1. All character data for an XML element is trimmed, joined, and placed into the `content` field.
2. The tag for an XML element is trimmed, and placed into the `tag` field.
3. The attributes for an XML element is placed as a `pcommon.Map` into the `attribute` field.
4. Processing instructions, directives, and comments are ignored and not represented in the resultant map.
5. All child elements are parsed as above, and placed in a `pcommon.Slice`, which is then placed into the `children` field.

For example, the following XML document:
```xml
<?xml version="1.0" encoding="UTF-8" ?>
<Log>
<User>
<ID>00001</ID>
<Name type="first">Joe</Name>
<Email>joe.smith@example.com</Email>
</User>
<Text>User fired alert A</Text>
</Log>
```

will be parsed as:
```json
{
"tag": "Log",
"children": [
{
"tag": "User",
"children": [
{
"tag": "ID",
"content": "00001"
},
{
"tag": "Name",
"content": "Joe",
"attributes": {
"type": "first"
}
},
{
"tag": "Email",
"content": "joe.smith@example.com"
}
]
},
{
"tag": "Text",
"content": "User fired alert A"
}
]
}
```

Examples:

- `ParseXML(body)`

- `ParseXML(attributes["xml"])`

- `ParseXML("<HostInfo hostname=\"example.com\" zone=\"east-1\" cloudprovider=\"aws\" />")`



### Seconds

`Seconds(value)`
Expand Down
134 changes: 134 additions & 0 deletions pkg/ottl/ottlfuncs/func_parse_xml.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"

import (
"bytes"
"context"
"encoding/xml"
"errors"
"fmt"
"strings"

"go.opentelemetry.io/collector/pdata/pcommon"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
)

type ParseXMLArguments[K any] struct {
Target ottl.StringGetter[K]
}

func NewParseXMLFactory[K any]() ottl.Factory[K] {
return ottl.NewFactory("ParseXML", &ParseXMLArguments[K]{}, createParseXMLFunction[K])
}

func createParseXMLFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) {
args, ok := oArgs.(*ParseXMLArguments[K])

if !ok {
return nil, fmt.Errorf("ParseXMLFactory args must be of type *ParseXMLArguments[K]")
}

return parseXML(args.Target), nil
}

// parseXML returns a `pcommon.Map` struct that is a result of parsing the target string as XML
func parseXML[K any](target ottl.StringGetter[K]) ottl.ExprFunc[K] {
return func(ctx context.Context, tCtx K) (any, error) {
targetVal, err := target.Get(ctx, tCtx)
if err != nil {
return nil, err
}

parsedXML := xmlElement{}

decoder := xml.NewDecoder(strings.NewReader(targetVal))
err = decoder.Decode(&parsedXML)
if err != nil {
return nil, fmt.Errorf("unmarshal xml: %w", err)
}

if decoder.InputOffset() != int64(len(targetVal)) {
return nil, errors.New("trailing bytes after parsing xml")
}

parsedMap := pcommon.NewMap()
parsedXML.intoMap(parsedMap)

return parsedMap, nil
}
}

type xmlElement struct {
tag string
attributes []xml.Attr
text string
children []xmlElement
}

// UnmarshalXML implements xml.Unmarshaler for xmlElement
func (a *xmlElement) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
a.tag = start.Name.Local
a.attributes = start.Attr

for {
tok, err := d.Token()
if err != nil {
return fmt.Errorf("decode next token: %w", err)
}

switch t := tok.(type) {
case xml.StartElement:
child := xmlElement{}
err := d.DecodeElement(&child, &t)
if err != nil {
return err
}

a.children = append(a.children, child)
case xml.EndElement:
// End element means we've reached the end of parsing
return nil
case xml.CharData:
// Strip leading/trailing spaces to ignore newlines and
// indentation in formatted XML
a.text += string(bytes.TrimSpace([]byte(t)))
case xml.Comment: // ignore comments
case xml.ProcInst: // ignore processing instructions
case xml.Directive: // ignore directives
default:
return fmt.Errorf("unexpected token type %T", t)
}
}
}

// intoMap converts and adds the xmlElement into the provided pcommon.Map.
func (a xmlElement) intoMap(m pcommon.Map) {
m.EnsureCapacity(4)

m.PutStr("tag", a.tag)

if a.text != "" {
m.PutStr("content", a.text)
}

if len(a.attributes) > 0 {
attrs := m.PutEmptyMap("attributes")
attrs.EnsureCapacity(len(a.attributes))

for _, attr := range a.attributes {
attrs.PutStr(attr.Name.Local, attr.Value)
}
}

if len(a.children) > 0 {
children := m.PutEmptySlice("children")
children.EnsureCapacity(len(a.children))

for _, child := range a.children {
child.intoMap(children.AppendEmpty().SetEmptyMap())
}
}
}
Loading
Loading