diff --git a/.chloggen/feat_ottl_xml-parse-function.yaml b/.chloggen/feat_ottl_xml-parse-function.yaml
new file mode 100755
index 0000000000000..710eedae3f487
--- /dev/null
+++ b/.chloggen/feat_ottl_xml-parse-function.yaml
@@ -0,0 +1,13 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: "enhancement"
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: pkg/ottl
+
+# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Add `ParseXML` function for parsing XML from a target string.
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [31133]
diff --git a/pkg/ottl/e2e/e2e_test.go b/pkg/ottl/e2e/e2e_test.go
index f850af9b0aa36..315a00e8c28b8 100644
--- a/pkg/ottl/e2e/e2e_test.go
+++ b/pkg/ottl/e2e/e2e_test.go
@@ -485,6 +485,22 @@ func Test_e2e_converters(t *testing.T) {
m.PutStr("k2", "v2__!__v2")
},
},
+ {
+ statement: `set(attributes["test"], ParseXML("This is a log message!"))`,
+ want: func(tCtx ottllog.TransformContext) {
+ log := tCtx.GetLogRecord().Attributes().PutEmptyMap("test")
+ log.PutStr("tag", "Log")
+
+ attrs := log.PutEmptyMap("attributes")
+ attrs.PutStr("id", "1")
+
+ logChildren := log.PutEmptySlice("children")
+
+ message := logChildren.AppendEmpty().SetEmptyMap()
+ message.PutStr("tag", "Message")
+ message.PutStr("content", "This is a log message!")
+ },
+ },
{
statement: `set(attributes["test"], Seconds(Duration("1m")))`,
want: func(tCtx ottllog.TransformContext) {
diff --git a/pkg/ottl/ottlfuncs/README.md b/pkg/ottl/ottlfuncs/README.md
index a87b6562f57bf..94712ca3074db 100644
--- a/pkg/ottl/ottlfuncs/README.md
+++ b/pkg/ottl/ottlfuncs/README.md
@@ -403,6 +403,7 @@ Available Converters:
- [ParseCSV](#parsecsv)
- [ParseJSON](#parsejson)
- [ParseKeyValue](#parsekeyvalue)
+- [ParseXML](#parsexml)
- [Seconds](#seconds)
- [SHA1](#sha1)
- [SHA256](#sha256)
@@ -913,6 +914,78 @@ Examples:
- `ParseKeyValue(attributes["pairs"])`
+### ParseXML
+
+`ParseXML(target)`
+
+The `ParseXML` Converter returns a `pcommon.Map` struct that is the result of parsing the target string as an XML document.
+
+`target` is a Getter that returns a string. This string should be in XML format.
+If `target` is not a string, nil, or cannot be parsed as XML, `ParseXML` will return an error.
+
+Unmarshalling XML is done using the following rules:
+1. All character data for an XML element is trimmed, joined, and placed into the `content` field.
+2. The tag for an XML element is trimmed, and placed into the `tag` field.
+3. The attributes for an XML element is placed as a `pcommon.Map` into the `attribute` field.
+4. Processing instructions, directives, and comments are ignored and not represented in the resultant map.
+5. All child elements are parsed as above, and placed in a `pcommon.Slice`, which is then placed into the `children` field.
+
+For example, the following XML document:
+```xml
+
+
+
+ 00001
+ Joe
+ joe.smith@example.com
+
+ User fired alert A
+
+```
+
+will be parsed as:
+```json
+{
+ "tag": "Log",
+ "children": [
+ {
+ "tag": "User",
+ "children": [
+ {
+ "tag": "ID",
+ "content": "00001"
+ },
+ {
+ "tag": "Name",
+ "content": "Joe",
+ "attributes": {
+ "type": "first"
+ }
+ },
+ {
+ "tag": "Email",
+ "content": "joe.smith@example.com"
+ }
+ ]
+ },
+ {
+ "tag": "Text",
+ "content": "User fired alert A"
+ }
+ ]
+}
+```
+
+Examples:
+
+- `ParseXML(body)`
+
+- `ParseXML(attributes["xml"])`
+
+- `ParseXML("")`
+
+
+
### Seconds
`Seconds(value)`
diff --git a/pkg/ottl/ottlfuncs/func_parse_xml.go b/pkg/ottl/ottlfuncs/func_parse_xml.go
new file mode 100644
index 0000000000000..42dac93307dfb
--- /dev/null
+++ b/pkg/ottl/ottlfuncs/func_parse_xml.go
@@ -0,0 +1,134 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"
+
+import (
+ "bytes"
+ "context"
+ "encoding/xml"
+ "errors"
+ "fmt"
+ "strings"
+
+ "go.opentelemetry.io/collector/pdata/pcommon"
+
+ "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
+)
+
+type ParseXMLArguments[K any] struct {
+ Target ottl.StringGetter[K]
+}
+
+func NewParseXMLFactory[K any]() ottl.Factory[K] {
+ return ottl.NewFactory("ParseXML", &ParseXMLArguments[K]{}, createParseXMLFunction[K])
+}
+
+func createParseXMLFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) {
+ args, ok := oArgs.(*ParseXMLArguments[K])
+
+ if !ok {
+ return nil, fmt.Errorf("ParseXMLFactory args must be of type *ParseXMLArguments[K]")
+ }
+
+ return parseXML(args.Target), nil
+}
+
+// parseXML returns a `pcommon.Map` struct that is a result of parsing the target string as XML
+func parseXML[K any](target ottl.StringGetter[K]) ottl.ExprFunc[K] {
+ return func(ctx context.Context, tCtx K) (any, error) {
+ targetVal, err := target.Get(ctx, tCtx)
+ if err != nil {
+ return nil, err
+ }
+
+ parsedXML := xmlElement{}
+
+ decoder := xml.NewDecoder(strings.NewReader(targetVal))
+ err = decoder.Decode(&parsedXML)
+ if err != nil {
+ return nil, fmt.Errorf("unmarshal xml: %w", err)
+ }
+
+ if decoder.InputOffset() != int64(len(targetVal)) {
+ return nil, errors.New("trailing bytes after parsing xml")
+ }
+
+ parsedMap := pcommon.NewMap()
+ parsedXML.intoMap(parsedMap)
+
+ return parsedMap, nil
+ }
+}
+
+type xmlElement struct {
+ tag string
+ attributes []xml.Attr
+ text string
+ children []xmlElement
+}
+
+// UnmarshalXML implements xml.Unmarshaler for xmlElement
+func (a *xmlElement) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
+ a.tag = start.Name.Local
+ a.attributes = start.Attr
+
+ for {
+ tok, err := d.Token()
+ if err != nil {
+ return fmt.Errorf("decode next token: %w", err)
+ }
+
+ switch t := tok.(type) {
+ case xml.StartElement:
+ child := xmlElement{}
+ err := d.DecodeElement(&child, &t)
+ if err != nil {
+ return err
+ }
+
+ a.children = append(a.children, child)
+ case xml.EndElement:
+ // End element means we've reached the end of parsing
+ return nil
+ case xml.CharData:
+ // Strip leading/trailing spaces to ignore newlines and
+ // indentation in formatted XML
+ a.text += string(bytes.TrimSpace([]byte(t)))
+ case xml.Comment: // ignore comments
+ case xml.ProcInst: // ignore processing instructions
+ case xml.Directive: // ignore directives
+ default:
+ return fmt.Errorf("unexpected token type %T", t)
+ }
+ }
+}
+
+// intoMap converts and adds the xmlElement into the provided pcommon.Map.
+func (a xmlElement) intoMap(m pcommon.Map) {
+ m.EnsureCapacity(4)
+
+ m.PutStr("tag", a.tag)
+
+ if a.text != "" {
+ m.PutStr("content", a.text)
+ }
+
+ if len(a.attributes) > 0 {
+ attrs := m.PutEmptyMap("attributes")
+ attrs.EnsureCapacity(len(a.attributes))
+
+ for _, attr := range a.attributes {
+ attrs.PutStr(attr.Name.Local, attr.Value)
+ }
+ }
+
+ if len(a.children) > 0 {
+ children := m.PutEmptySlice("children")
+ children.EnsureCapacity(len(a.children))
+
+ for _, child := range a.children {
+ child.intoMap(children.AppendEmpty().SetEmptyMap())
+ }
+ }
+}
diff --git a/pkg/ottl/ottlfuncs/func_parse_xml_test.go b/pkg/ottl/ottlfuncs/func_parse_xml_test.go
new file mode 100644
index 0000000000000..8c348d3a6e762
--- /dev/null
+++ b/pkg/ottl/ottlfuncs/func_parse_xml_test.go
@@ -0,0 +1,309 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package ottlfuncs
+
+import (
+ "context"
+ "fmt"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "go.opentelemetry.io/collector/pdata/pcommon"
+
+ "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
+)
+
+func Test_ParseXML(t *testing.T) {
+ tests := []struct {
+ name string
+ oArgs ottl.Arguments
+ want map[string]any
+ createError string
+ parseError string
+ }{
+ {
+ name: "Text values in nested elements",
+ oArgs: &ParseXMLArguments[any]{
+ Target: ottl.StandardStringGetter[any]{
+ Getter: func(_ context.Context, _ any) (any, error) {
+ return "00001Joejoe.smith@example.comUser did a thing", nil
+ },
+ },
+ },
+ want: map[string]any{
+ "tag": "Log",
+ "children": []any{
+ map[string]any{
+ "tag": "User",
+ "children": []any{
+ map[string]any{
+ "tag": "ID",
+ "content": "00001",
+ },
+ map[string]any{
+ "tag": "Name",
+ "content": "Joe",
+ },
+ map[string]any{
+ "tag": "Email",
+ "content": "joe.smith@example.com",
+ },
+ },
+ },
+ map[string]any{
+ "tag": "Text",
+ "content": "User did a thing",
+ },
+ },
+ },
+ },
+ {
+ name: "Formatted example",
+ oArgs: &ParseXMLArguments[any]{
+ Target: ottl.StandardStringGetter[any]{
+ Getter: func(_ context.Context, _ any) (any, error) {
+ return `
+
+
+ 00001
+ Joe
+ joe.smith@example.com
+
+ User did a thing
+ `, nil
+ },
+ },
+ },
+ want: map[string]any{
+ "tag": "Log",
+ "children": []any{
+ map[string]any{
+ "tag": "User",
+ "children": []any{
+ map[string]any{
+ "tag": "ID",
+ "content": "00001",
+ },
+ map[string]any{
+ "tag": "Name",
+ "content": "Joe",
+ },
+ map[string]any{
+ "tag": "Email",
+ "content": "joe.smith@example.com",
+ },
+ },
+ },
+ map[string]any{
+ "tag": "Text",
+ "content": "User did a thing",
+ },
+ },
+ },
+ },
+ {
+ name: "Multiple tags with the same name",
+ oArgs: &ParseXMLArguments[any]{
+ Target: ottl.StandardStringGetter[any]{
+ Getter: func(_ context.Context, _ any) (any, error) {
+ return `This record has a collision`, nil
+ },
+ },
+ },
+ want: map[string]any{
+ "tag": "Log",
+ "content": "This record has a collision",
+ "children": []any{
+ map[string]any{
+ "tag": "User",
+ "attributes": map[string]any{
+ "id": "0001",
+ },
+ },
+ map[string]any{
+ "tag": "User",
+ "attributes": map[string]any{
+ "id": "0002",
+ },
+ },
+ },
+ },
+ },
+ {
+ name: "Multiple lines of content",
+ oArgs: &ParseXMLArguments[any]{
+ Target: ottl.StandardStringGetter[any]{
+ Getter: func(_ context.Context, _ any) (any, error) {
+ return `
+ This record has multiple lines of
+
+ text content
+ `, nil
+ },
+ },
+ },
+ want: map[string]any{
+ "tag": "Log",
+ "content": "This record has multiple lines oftext content",
+ "children": []any{
+ map[string]any{
+ "tag": "User",
+ "attributes": map[string]any{
+ "id": "0001",
+ },
+ },
+ },
+ },
+ },
+ {
+ name: "Attribute only element",
+ oArgs: &ParseXMLArguments[any]{
+ Target: ottl.StandardStringGetter[any]{
+ Getter: func(_ context.Context, _ any) (any, error) {
+ return ``, nil
+ },
+ },
+ },
+ want: map[string]any{
+ "tag": "HostInfo",
+ "attributes": map[string]any{
+ "hostname": "example.com",
+ "zone": "east-1",
+ "cloudprovider": "aws",
+ },
+ },
+ },
+ {
+ name: "Ignores XML declaration",
+ oArgs: &ParseXMLArguments[any]{
+ Target: ottl.StandardStringGetter[any]{
+ Getter: func(_ context.Context, _ any) (any, error) {
+ return `Log content`, nil
+ },
+ },
+ },
+ want: map[string]any{
+ "tag": "Log",
+ "content": "Log content",
+ },
+ },
+ {
+ name: "Ignores comments",
+ oArgs: &ParseXMLArguments[any]{
+ Target: ottl.StandardStringGetter[any]{
+ Getter: func(_ context.Context, _ any) (any, error) {
+ return `This has a comment `, nil
+ },
+ },
+ },
+ want: map[string]any{
+ "tag": "Log",
+ "content": "This has a comment",
+ },
+ },
+ {
+ name: "Ignores processing instructions",
+ oArgs: &ParseXMLArguments[any]{
+ Target: ottl.StandardStringGetter[any]{
+ Getter: func(_ context.Context, _ any) (any, error) {
+ return `Log content`, nil
+ },
+ },
+ },
+ want: map[string]any{
+ "tag": "Log",
+ "content": "Log content",
+ },
+ },
+ {
+ name: "Ignores directives",
+ oArgs: &ParseXMLArguments[any]{
+ Target: ottl.StandardStringGetter[any]{
+ Getter: func(_ context.Context, _ any) (any, error) {
+ return `Log content`, nil
+ },
+ },
+ },
+ want: map[string]any{
+ "tag": "Log",
+ "content": "Log content",
+ },
+ },
+ {
+ name: "Missing closing element",
+ oArgs: &ParseXMLArguments[any]{
+ Target: ottl.StandardStringGetter[any]{
+ Getter: func(_ context.Context, _ any) (any, error) {
+ return ``, nil
+ },
+ },
+ },
+ parseError: "unmarshal xml: decode next token: XML syntax error on line 1: unexpected EOF",
+ },
+ {
+ name: "Missing nested closing element",
+ oArgs: &ParseXMLArguments[any]{
+ Target: ottl.StandardStringGetter[any]{
+ Getter: func(_ context.Context, _ any) (any, error) {
+ return ``, nil
+ },
+ },
+ },
+ parseError: "unmarshal xml: decode next token: XML syntax error on line 1: element closed by ",
+ },
+ {
+ name: "Multiple XML elements in payload (trailing bytes)",
+ oArgs: &ParseXMLArguments[any]{
+ Target: ottl.StandardStringGetter[any]{
+ Getter: func(_ context.Context, _ any) (any, error) {
+ return ``, nil
+ },
+ },
+ },
+ parseError: "trailing bytes after parsing xml",
+ },
+ {
+ name: "Error getting target",
+ oArgs: &ParseXMLArguments[any]{
+ Target: ottl.StandardStringGetter[any]{
+ Getter: func(_ context.Context, _ any) (any, error) {
+ return "", fmt.Errorf("failed to get string")
+ },
+ },
+ },
+ parseError: "error getting value in ottl.StandardStringGetter[interface {}]: failed to get string",
+ },
+ {
+ name: "Invalid arguments",
+ oArgs: nil,
+ createError: "ParseXMLFactory args must be of type *ParseXMLArguments[K]",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ exprFunc, err := createParseXMLFunction[any](ottl.FunctionContext{}, tt.oArgs)
+ if tt.createError != "" {
+ require.ErrorContains(t, err, tt.createError)
+ return
+ }
+
+ require.NoError(t, err)
+
+ result, err := exprFunc(context.Background(), nil)
+ if tt.parseError != "" {
+ require.ErrorContains(t, err, tt.parseError)
+ return
+ }
+
+ assert.NoError(t, err)
+
+ resultMap, ok := result.(pcommon.Map)
+ require.True(t, ok)
+
+ require.Equal(t, tt.want, resultMap.AsRaw())
+ })
+ }
+}
diff --git a/pkg/ottl/ottlfuncs/functions.go b/pkg/ottl/ottlfuncs/functions.go
index 1f419a746e42a..9bb33ff3230f0 100644
--- a/pkg/ottl/ottlfuncs/functions.go
+++ b/pkg/ottl/ottlfuncs/functions.go
@@ -61,6 +61,7 @@ func converters[K any]() []ottl.Factory[K] {
NewParseCSVFactory[K](),
NewParseJSONFactory[K](),
NewParseKeyValueFactory[K](),
+ NewParseXMLFactory[K](),
NewSecondsFactory[K](),
NewSHA1Factory[K](),
NewSHA256Factory[K](),