From 6d961b38444c5bbe16f02dd1dedc382f78c56204 Mon Sep 17 00:00:00 2001 From: Rich Megginson Date: Wed, 31 Jul 2019 16:30:18 -0600 Subject: [PATCH] Bug 1734951: rsyslog with MERGE_JSON_LOG=true does not handle non-JSON data https://bugzilla.redhat.com/show_bug.cgi?id=1734951 When mmnormalize fails to parse, it will create the fields `originalmsg` and `unparsed-data` in the record: https://github.com/rsyslog/liblognorm/blob/master/src/pdag.c#L1680 In this case, we want to remove the fields from the record, just after where the mmnormalize occurred. This also ensures that the timestamp we use to generate the index name is normalized i.e. make sure it is in the form `YYYY-MM-DD`. If using `MERGE_JSON_LOG=true`, the user may have already added a `@timestamp` field to the record which may be in some odd format. --- files/rsyslog/20-viaq-templates.conf | 6 +-- files/rsyslog/60-mmk8s.conf | 60 ++++++++++++++++----------- files/rsyslog/65-viaq-formatting.conf | 25 +++++++++-- files/rsyslog/67-mmexternal.conf | 5 +++ 4 files changed, 65 insertions(+), 31 deletions(-) diff --git a/files/rsyslog/20-viaq-templates.conf b/files/rsyslog/20-viaq-templates.conf index 160e73c305..be3b02700e 100644 --- a/files/rsyslog/20-viaq-templates.conf +++ b/files/rsyslog/20-viaq-templates.conf @@ -15,11 +15,11 @@ template(name="viaq_template_nl" type="list") { template(name="prefix_index_template" type="list") { property(name="$.viaq_index_prefix") - property(name="$!@timestamp" dateFormat="rfc3339" position.from="1" position.to="4") + property(name="$.ts_for_index" dateFormat="rfc3339" position.from="1" position.to="4") constant(value=".") - property(name="$!@timestamp" dateFormat="rfc3339" position.from="6" position.to="7") + property(name="$.ts_for_index" dateFormat="rfc3339" position.from="6" position.to="7") constant(value=".") - property(name="$!@timestamp" dateFormat="rfc3339" position.from="9" position.to="10") + property(name="$.ts_for_index" dateFormat="rfc3339" position.from="9" position.to="10") } template(name="index_template" type="string" string="%$.viaq_index_name%") diff --git a/files/rsyslog/60-mmk8s.conf b/files/rsyslog/60-mmk8s.conf index 788328bef0..44c2021e8b 100644 --- a/files/rsyslog/60-mmk8s.conf +++ b/files/rsyslog/60-mmk8s.conf @@ -16,36 +16,48 @@ if ((strlen($!CONTAINER_NAME) > 0) and (strlen($!CONTAINER_ID_FULL) > 0)) or if ((strlen($!metadata) > 0) and (strlen($!metadata!filename) > 0) and ($!metadata!filename startswith "/var/log/containers/")) then { if $msg startswith "{" then { action(name="parse_multiline_docker_json" type="mmnormalize" rulebase="/etc/rsyslog.d/multiline-json.rulebase") - foreach ($.ii in $!multilinejson) do { - if strlen($!@timestamp) == 0 then { - set $!@timestamp = $.ii!time; - } - if strlen($!stream) == 0 then { - set $!stream = $.ii!stream; - } - if strlen($!log) == 0 then { - set $!log = $.ii!log; - } else { - reset $!log = $!log & $.ii!log; + if strlen($!originalmsg) > 0 then { + # parsing failed + unset $!originalmsg; + unset $!unparsed-data; + } else { + foreach ($.ii in $!multilinejson) do { + if strlen($!@timestamp) == 0 then { + set $!@timestamp = $.ii!time; + } + if strlen($!stream) == 0 then { + set $!stream = $.ii!stream; + } + if strlen($!log) == 0 then { + set $!log = $.ii!log; + } else { + reset $!log = $!log & $.ii!log; + } } + unset $!multilinejson; } - unset $!multilinejson; } else { action(name="parse_multiline_crio_file" type="mmnormalize" rulebase="/etc/rsyslog.d/crio.rulebase") - foreach ($.ii in $!multilinecrio) do { - if strlen($!@timestamp) == 0 then { - set $!@timestamp = $.ii!time; - } - if strlen($!stream) == 0 then { - set $!stream = $.ii!stream; - } - if strlen($!log) == 0 then { - set $!log = $.ii!log; - } else { - reset $!log = $!log & $.ii!log; + if strlen($!originalmsg) > 0 then { + # parsing failed + unset $!originalmsg; + unset $!unparsed-data; + } else { + foreach ($.ii in $!multilinecrio) do { + if strlen($!@timestamp) == 0 then { + set $!@timestamp = $.ii!time; + } + if strlen($!stream) == 0 then { + set $!stream = $.ii!stream; + } + if strlen($!log) == 0 then { + set $!log = $.ii!log; + } else { + reset $!log = $!log & $.ii!log; + } } + unset $!multilinecrio; } - unset $!multilinecrio; } } action(type="mmkubernetes" diff --git a/files/rsyslog/65-viaq-formatting.conf b/files/rsyslog/65-viaq-formatting.conf index b67d3abca8..5d9d66b84d 100644 --- a/files/rsyslog/65-viaq-formatting.conf +++ b/files/rsyslog/65-viaq-formatting.conf @@ -11,6 +11,11 @@ if strlen($!MESSAGE) > 0 then { set $!pipeline_metadata!collector!original_raw_message = $!MESSAGE; if (strlen(`echo $MERGE_JSON_LOG`) > 0) and (`echo $MERGE_JSON_LOG` == "true") then { action(name="parse_json_journald" type="mmnormalize" ruleBase="/etc/rsyslog.d/parse_json.rulebase" variable="$!MESSAGE") + if strlen($!originalmsg) > 0 then { + # parsing failed - not json - just continue + unset $!originalmsg; + unset $!unparsed-data; + } } # ensure that $!message is set and $!MESSAGE is unset # if rsyslog is case sensitive, then $!MESSAGE == $!message @@ -25,21 +30,22 @@ if strlen($!MESSAGE) > 0 then { } unset $!MESSAGE; } - unset $!originalmsg; - unset $!unparsed-data; } else { if strlen($!log) > 0 then { set $!pipeline_metadata!collector!original_raw_message = $!log; if (strlen(`echo $MERGE_JSON_LOG`) > 0) and (`echo $MERGE_JSON_LOG` == "true") then { action(name="parse_json_containers" type="mmnormalize" ruleBase="/etc/rsyslog.d/parse_json.rulebase" variable="$!log") + if strlen($!originalmsg) > 0 then { + # parsing failed - not json - just continue + unset $!originalmsg; + unset $!unparsed-data; + } } if strlen($!message) == 0 then { set $!message = $!log; } unset $!log; } - unset $!originalmsg; - unset $!unparsed-data; } if strlen($!_MACHINE_ID) > 0 then { @@ -262,6 +268,17 @@ if strlen($!level) > 0 then { } unset $!stream; +# normalize timestamp for use in index name, but preserve whatever +# timestamp the user may have set in the log e.g. when using +# MERGE_JSON_LOG=true and the user has set "@timestamp" in the log +# we must have something in YYYY-MM-DD format for the index suffix +set $.tssec = parse_time($!@timestamp); +if $.tssec == 0 then { + set $.ts_for_index = exec_template("cnvt_to_viaq_timestamp"); +} else { + set $.ts_for_index = format_time($.tssec, "date-rfc3339"); +} + # add eventrouter if (strlen($!kubernetes) > 0) and (strlen($!kubernetes!namespace_name) > 0) then { diff --git a/files/rsyslog/67-mmexternal.conf b/files/rsyslog/67-mmexternal.conf index e950c82984..c30c9a88ec 100644 --- a/files/rsyslog/67-mmexternal.conf +++ b/files/rsyslog/67-mmexternal.conf @@ -2,6 +2,11 @@ module(load="mmexternal") if (strlen(`echo $SKIP_EMPTY`) > 0) and (`echo $SKIP_EMPTY` == "true") then { action(name="skip_empty" type="mmnormalize" ruleBase="/etc/rsyslog.d/parse_json_skip_empty.rulebase" path="$!openshift_logging_all") + if strlen($!originalmsg) > 0 then { + # parsing failed - not json - just continue + unset $!originalmsg; + unset $!unparsed-data; + } } else if (strlen(`echo $USE_MMEXTERNAL`) > 0) and (`echo $USE_MMEXTERNAL` == "true") then { action(name="undefined_field" type="mmexternal" binary="/usr/local/bin/undefined_field" interface.input="fulljson") }