Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Header support for json_array_parser #30814

Merged
merged 3 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .chloggen/add_jarray_parser_header.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/stanza

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add support in a header configuration for json array parser.

# One or more tracking issues related to the change
issues: [30321]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
43 changes: 42 additions & 1 deletion pkg/stanza/docs/operators/json_array_parser.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ More information on json arrays can be found [here](https://json-schema.org/unde
|--------------------|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------|
| `id` | `json_array_parser` | A unique identifier for the operator. |
| `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries. |
| `header` | optional | A string of comma delimited field names. When a header is set, the output will be a map containing the header fields as keys and the parsed input json array fields as matching values |
| `parse_from` | `body` | The [field](../types/field.md) from which the value will be parsed. |
| `parse_to` | required. can be one of `body` or a nested field inside `body`, `attributes` or `resource` (ie `attributes.parsed`) | The [field](../types/field.md) to which the value will be parsed. |
| `parse_to` | required. can be one of `body` or a nested field inside `body`, `attributes` or `resource` (ie `attributes.parsed`). When a header is used, `attributes` is also valid | The [field](../types/field.md) to which the value will be parsed. |
| `on_error` | `send` | The behavior of the operator if it encounters an error. See [on_error](../types/on_error.md). |
| `timestamp` | `nil` | An optional [timestamp](../types/timestamp.md) block which will parse a timestamp field before passing the entry to the output operator. |
| `severity` | `nil` | An optional [severity](../types/severity.md) block which will parse a severity field before passing the entry to the output operator. |
Expand Down Expand Up @@ -124,6 +125,46 @@ Configuration:
}
```

</td>
</tr>
</table>

#### Parse the field `body` with a json array parser and a header into attributes

Configuration:

```yaml
- type: json_array_parser
parse_to: attributes
header: origin,sev,message,isBool
```

<table>
<tr><td> Input Entry </td> <td> Output Entry </td></tr>
<tr>
<td>

```json
{
"body": "[1,\"debug\",\"Debug Message\", true]"
}
```

</td>
<td>

```json
{
"body": "[1,\"debug\",\"Debug Message\", true]",
"attributes": {
"origin": 1,
"sev": "debug",
"message": "Debug Message",
"isBool": true,
}
}
```

</td>
</tr>
</table>
9 changes: 9 additions & 0 deletions pkg/stanza/operator/parser/jsonarray/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@ func TestConfig(t *testing.T) {
return p
}(),
},
{
Name: "parse_with_header_as_attributes",
Expect: func() *Config {
p := NewConfig()
p.ParseTo = entry.RootableField{Field: entry.NewAttributeField()}
p.Header = "A,B,C"
return p
}(),
},
},
}.Run(t)
}
123 changes: 90 additions & 33 deletions pkg/stanza/operator/parser/jsonarray/json_array_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"context"
"errors"
"fmt"
"strings"

"github.com/valyala/fastjson"
"go.opentelemetry.io/collector/featuregate"
Expand All @@ -17,6 +18,7 @@ import (
)

const operatorType = "json_array_parser"
const headerDelimiter = ","

var jsonArrayParserFeatureGate = featuregate.GlobalRegistry().MustRegister(
"logs.jsonParserArray",
Expand Down Expand Up @@ -46,6 +48,7 @@ func NewConfigWithID(operatorID string) *Config {
// Config is the configuration of a json array parser operator.
type Config struct {
helper.ParserConfig `mapstructure:",squash"`
Header string `mapstructure:"header"`
}

// Build will build a json array parser operator.
Expand All @@ -55,59 +58,113 @@ func (c Config) Build(logger *zap.SugaredLogger) (operator.Operator, error) {
return nil, err
}

if c.Header != "" {
return &Parser{
ParserOperator: parserOperator,
parse: generateParseToMapFunc(new(fastjson.ParserPool), strings.Split(c.Header, headerDelimiter)),
}, nil
}

return &Parser{
ParserOperator: parserOperator,
pool: new(fastjson.ParserPool),
parse: generateParseToArrayFunc(new(fastjson.ParserPool)),
}, nil
}

// Parser is an operator that parses json array in an entry.
type Parser struct {
helper.ParserOperator
pool *fastjson.ParserPool
parse parseFunc
}

type parseFunc func(any) (any, error)

// Process will parse an entry for json array.
func (r *Parser) Process(ctx context.Context, e *entry.Entry) error {
return r.ParserOperator.ProcessWith(ctx, e, r.parse)
}

func (r *Parser) parse(value any) (any, error) {
jArrayLine, err := valueAsString(value)
if err != nil {
return nil, err
}
func generateParseToArrayFunc(pool *fastjson.ParserPool) parseFunc {
return func(value any) (any, error) {
jArrayLine, err := valueAsString(value)
if err != nil {
return nil, err
}

p := r.pool.Get()
v, err := p.Parse(jArrayLine)
r.pool.Put(p)
if err != nil {
return nil, errors.New("failed to parse entry")
}
p := pool.Get()
v, err := p.Parse(jArrayLine)
pool.Put(p)
if err != nil {
return nil, errors.New("failed to parse entry")
}

jArray := v.GetArray() // a is a []*Value slice
parsedValues := make([]any, len(jArray))
for i := range jArray {
switch jArray[i].Type() {
case fastjson.TypeNumber:
parsedValues[i] = jArray[i].GetInt64()
case fastjson.TypeString:
parsedValues[i] = string(jArray[i].GetStringBytes())
case fastjson.TypeTrue:
parsedValues[i] = true
case fastjson.TypeFalse:
parsedValues[i] = false
case fastjson.TypeNull:
parsedValues[i] = nil
case fastjson.TypeObject:
// Nested objects handled as a string since this parser doesn't support nested headers
parsedValues[i] = jArray[i].String()
default:
return nil, errors.New("failed to parse entry: " + string(jArray[i].MarshalTo(nil)))
jArray := v.GetArray() // a is a []*Value slice
parsedValues := make([]any, len(jArray))
for i := range jArray {
switch jArray[i].Type() {
case fastjson.TypeNumber:
parsedValues[i] = jArray[i].GetInt64()
case fastjson.TypeString:
parsedValues[i] = string(jArray[i].GetStringBytes())
case fastjson.TypeTrue:
parsedValues[i] = true
case fastjson.TypeFalse:
parsedValues[i] = false
case fastjson.TypeNull:
parsedValues[i] = nil
case fastjson.TypeObject:
// Nested objects handled as a string since this parser doesn't support nested headers
parsedValues[i] = jArray[i].String()
default:
return nil, errors.New("failed to parse entry: " + string(jArray[i].MarshalTo(nil)))
}
}

return parsedValues, nil
}
}

func generateParseToMapFunc(pool *fastjson.ParserPool, header []string) parseFunc {
return func(value any) (any, error) {
jArrayLine, err := valueAsString(value)
if err != nil {
return nil, err
}

p := pool.Get()
v, err := p.Parse(jArrayLine)
pool.Put(p)
if err != nil {
return nil, errors.New("failed to parse entry")
}

return parsedValues, nil
jArray := v.GetArray() // a is a []*Value slice
if len(header) != len(jArray) {
return nil, fmt.Errorf("wrong number of fields: expected %d, found %d", len(header), len(jArray))
}
parsedValues := make(map[string]any, len(jArray))
for i := range jArray {
switch jArray[i].Type() {
case fastjson.TypeNumber:
parsedValues[header[i]] = jArray[i].GetInt64()
case fastjson.TypeString:
parsedValues[header[i]] = string(jArray[i].GetStringBytes())
case fastjson.TypeTrue:
parsedValues[header[i]] = true
case fastjson.TypeFalse:
parsedValues[header[i]] = false
case fastjson.TypeNull:
parsedValues[header[i]] = nil
case fastjson.TypeObject:
// Nested objects handled as a string since this parser doesn't support nested headers
parsedValues[header[i]] = jArray[i].String()
default:
return nil, errors.New("failed to parse entry: " + string(jArray[i].MarshalTo(nil)))
}
}

return parsedValues, nil
}
}

// valueAsString interprets the given value as a string.
Expand Down
37 changes: 37 additions & 0 deletions pkg/stanza/operator/parser/jsonarray/json_array_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,17 @@ func TestParserInvalidType(t *testing.T) {
require.Contains(t, err.Error(), "type '[]int' cannot be parsed as json array")
}

func TestParserByteFailureHeadersMismatch(t *testing.T) {
cfg := NewConfigWithID("test")
cfg.Header = "name,sev,msg"
op, err := cfg.Build(testutil.Logger(t))
require.NoError(t, err)
parser := op.(*Parser)
_, err = parser.parse("[\"stanza\",\"INFO\",\"started agent\", 42, true]")
require.Error(t, err)
require.Contains(t, err.Error(), "wrong number of fields: expected 3, found 5")
}

func TestParserJarray(t *testing.T) {
cases := []struct {
name string
Expand Down Expand Up @@ -193,6 +204,32 @@ func TestParserJarray(t *testing.T) {
false,
false,
},
{
"parse-as-attributes-with-header",
func(p *Config) {
p.ParseTo = entry.RootableField{Field: entry.NewAttributeField()}
p.Header = "origin,sev,message,count,isBool"
},
[]entry.Entry{
{
Body: "[\"stanza\",\"INFO\",\"started agent\", 42, true]",
},
},
[]entry.Entry{
{
Body: "[\"stanza\",\"INFO\",\"started agent\", 42, true]",
Attributes: map[string]any{
"origin": "stanza",
"sev": "INFO",
"message": "started agent",
"count": int64(42),
"isBool": true,
},
},
},
false,
false,
},
}

for _, tc := range cases {
Expand Down
4 changes: 4 additions & 0 deletions pkg/stanza/operator/parser/jsonarray/testdata/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ parse_to_body:
parse_to_resource:
type: json_array_parser
parse_to: resource.output
parse_with_header_as_attributes:
type: json_array_parser
parse_to: attributes
header: A,B,C
Loading