Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make quoted capture less greedy when we have unambiguous separator #62

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
@@ -1,3 +1,7 @@
## 4.1.2
- bugfix: improves trim_key and trim_value to trim any _sequence_ of matching characters from the beginning and ends of the corresponding keys and values; a previous implementation limitited trim to a single character from each end, which was surprising.
- bugfix: fixes issue where we can fail to correctly break up a sequence that includes a partially-quoted value followed by another fully-quoted value by slightly reducing greediness of quoted-value captures.

## 4.1.1
- bugfix: correctly handle empty values between value separator and field separator (#58)

Expand Down
8 changes: 4 additions & 4 deletions lib/logstash/filters/kv.rb
Expand Up @@ -331,8 +331,8 @@ def register
)
end

@trim_value_re = Regexp.new("^[#{@trim_value}]|[#{@trim_value}]$") if @trim_value
@trim_key_re = Regexp.new("^[#{@trim_key}]|[#{@trim_key}]$") if @trim_key
@trim_value_re = Regexp.new("^[#{@trim_value}]+|[#{@trim_value}]+$") if @trim_value
@trim_key_re = Regexp.new("^[#{@trim_key}]+|[#{@trim_key}]+$") if @trim_key

@remove_char_value_re = Regexp.new("[#{@remove_char_value}]") if @remove_char_value
@remove_char_key_re = Regexp.new("[#{@remove_char_key}]") if @remove_char_key
Expand Down Expand Up @@ -422,8 +422,8 @@ def quoted_capture(quote_sequence, close_quote_sequence=quote_sequence)
open_pattern = /#{Regexp.quote(quote_sequence)}/
close_pattern = /#{Regexp.quote(close_quote_sequence)}/

# matches a sequence of zero or more characters that is followed by the `close_quote_sequence`
quoted_value_pattern = /(?:.)*?(?=#{Regexp.quote(close_quote_sequence)})/
# matches a sequence of zero or more characters are _not_ the `close_quote_sequence`
quoted_value_pattern = /[^#{Regexp.quote(close_quote_sequence)}]*/

/#{open_pattern}(#{quoted_value_pattern})#{close_pattern}/
end
Expand Down
2 changes: 1 addition & 1 deletion logstash-filter-kv.gemspec
@@ -1,7 +1,7 @@
Gem::Specification.new do |s|

s.name = 'logstash-filter-kv'
s.version = '4.1.1'
s.version = '4.1.2'
s.licenses = ['Apache License (2.0)']
s.summary = "Parses key-value pairs"
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
Expand Down
87 changes: 87 additions & 0 deletions spec/filters/kv_spec.rb
Expand Up @@ -730,6 +730,59 @@
end
end

describe "trim_key/trim_value options : trim multiple matching characters from either end" do
subject do
plugin = LogStash::Filters::KV.new(options)
plugin.register
plugin
end

let(:data) { {"message" => message} }
let(:event) { LogStash::Event.new(data) }


context 'repeated same-character sequence' do
let(:message) { "key1= value1 with spaces | key2 with spaces =value2" }
let(:options) {
{
"field_split" => "|",
"value_split" => "=",
"trim_value" => " ",
"trim_key" => " "
}
}

it 'trims all the right bits' do
subject.filter(event)
expect(event.get('key1')).to eq('value1 with spaces')
expect(event.get('key2 with spaces')).to eq('value2')
end
end

context 'multi-character sequence' do
let(:message) { "to=<foo@example.com>, orig_to=<bar@example.com>, %+relay=mail.example.com[private/dovecot-lmtp], delay=2.2, delays=1.9/0.01/0.01/0.21, dsn=2.0.0, status=sent (250 2.0.0 <foo@example.com> YERDHejiRSXFDSdfUXTV Saved) " }
let(:options) {
{
"field_split" => " ",
"value_split" => "=",
"trim_value" => "<>,",
"trim_key" => "%+"
}
}

it 'trims all the right bits' do
subject.filter(event)
expect(event.get('to')).to eq('foo@example.com')
expect(event.get('orig_to')).to eq('bar@example.com')
expect(event.get('relay')).to eq('mail.example.com[private/dovecot-lmtp]')
expect(event.get('delay')).to eq('2.2')
expect(event.get('delays')).to eq('1.9/0.01/0.01/0.21')
expect(event.get('dsn')).to eq('2.0.0')
expect(event.get('status')).to eq('sent')
end
end
end

describe "remove_char_key/remove_char_value options : remove all characters in keys/values whatever their position" do
subject do
plugin = LogStash::Filters::KV.new(options)
Expand Down Expand Up @@ -833,6 +886,40 @@
it_behaves_like "parsing all fields and values"
end

context 'multi-char field split pattern with value that begins quoted and contains more unquoted' do
let(:message) { 'foo=bar!!!!!baz="quoted stuff" and more unquoted!!!!!msg="fully-quoted with a part! of the separator"!!!!!blip="this!!!!!is it"!!!!!empty=""!!!!!non-empty="foo"' }
let(:options) {
{
"field_split_pattern" => "!!!!!"
}
}
it 'gets the right bits' do
subject.filter(event)
expect(event.get("foo")).to eq('bar')
expect(event.get("baz")).to eq('"quoted stuff" and more unquoted')
expect(event.get("msg")).to eq('fully-quoted with a part! of the separator')
expect(event.get("blip")).to eq('this!!!!!is it')
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️

expect(event.get("empty")).to be_nil
expect(event.get("non-empty")).to eq('foo')
end
end

context 'standard field split pattern with value that begins quoted and contains more unquoted' do
let(:message) { 'foo=bar baz="quoted stuff" and more unquoted msg="some fully-quoted message " empty="" non-empty="foo"' }
let(:options) {
{
}
}
it 'gets the right bits' do
subject.filter(event)
expect(event.get("foo")).to eq('bar')
expect(event.get("baz")).to eq('quoted stuff') # NOTE: outside the quotes is truncated because field split pattern wins.
expect(event.get("msg")).to eq('some fully-quoted message ')
expect(event.get("empty")).to be_nil
expect(event.get("non-empty")).to eq('foo')
end
end

context "field and value split multi" do
let(:message) { "hello::world__foo::bar__baz::fizz__doublequoted::\"hello world\"__singlequoted::'hello world'__bracketsone::(hello world)__bracketstwo::[hello world]__bracketsthree::<hello world>" }
let(:options) {
Expand Down