From 608b82460cfe5f6f579324ee132171c055703629 Mon Sep 17 00:00:00 2001 From: kares Date: Wed, 10 Nov 2021 12:01:35 +0100 Subject: [PATCH 01/22] Refactor: drop insist and unused socket require --- lib/logstash/inputs/imap.rb | 1 - logstash-input-imap.gemspec | 1 - spec/inputs/imap_spec.rb | 18 ++++++++---------- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index ea6869c..72811ba 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -3,7 +3,6 @@ require "logstash/namespace" require "logstash/timestamp" require "stud/interval" -require "socket" # for Socket.gethostname # Read mails from IMAP server # diff --git a/logstash-input-imap.gemspec b/logstash-input-imap.gemspec index 8234078..80e89b6 100644 --- a/logstash-input-imap.gemspec +++ b/logstash-input-imap.gemspec @@ -27,5 +27,4 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'stud', '~> 0.0.22' s.add_development_dependency 'logstash-devutils' - s.add_development_dependency 'insist' end diff --git a/spec/inputs/imap_spec.rb b/spec/inputs/imap_spec.rb index 2108df3..bf1e1f1 100644 --- a/spec/inputs/imap_spec.rb +++ b/spec/inputs/imap_spec.rb @@ -1,13 +1,11 @@ # encoding: utf-8 require "logstash/devutils/rspec/spec_helper" -require "insist" require "logstash/devutils/rspec/shared_examples" require "logstash/inputs/imap" require "mail" require "net/imap" require "base64" - describe LogStash::Inputs::IMAP do context "when interrupting the plugin" do @@ -67,7 +65,7 @@ input = LogStash::Inputs::IMAP.new config input.register event = input.parse_mail(subject) - insist { event.get("message") } == msg_text + expect( event.get("message") ).to eql msg_text end end @@ -80,7 +78,7 @@ input = LogStash::Inputs::IMAP.new config input.register event = input.parse_mail(subject) - insist { event.get("message") } == msg_html + expect( event.get("message") ).to eql msg_html end end end @@ -94,7 +92,7 @@ input = LogStash::Inputs::IMAP.new config input.register event = input.parse_mail(subject) - insist { event.get("subject") } == "foo : bar" + expect( event.get("subject") ).to eql "foo : bar" end end @@ -109,7 +107,7 @@ input = LogStash::Inputs::IMAP.new config input.register event = input.parse_mail(subject) - insist { event.get("received") } == ["test1", "test2"] + expect( event.get("received") ).to eql ["test1", "test2"] end it "should add more than 2 values as array in event" do @@ -123,7 +121,7 @@ input = LogStash::Inputs::IMAP.new config input.register event = input.parse_mail(subject) - insist { event.get("received") } == ["test1", "test2", "test3"] + expect( event.get("received") ).to eql ["test1", "test2", "test3"] end end @@ -136,7 +134,7 @@ input = LogStash::Inputs::IMAP.new config input.register event = input.parse_mail(subject) - insist { event.get("message") } == msg_text + expect( event.get("message") ).to eql msg_text end end @@ -148,7 +146,7 @@ input = LogStash::Inputs::IMAP.new config input.register event = input.parse_mail(subject) - insist { event.get("attachments") } == [ + expect( event.get("attachments") ).to eql [ {"filename"=>"some.html"}, {"filename"=>"image.png"}, {"filename"=>"unencoded.data"} @@ -163,7 +161,7 @@ input = LogStash::Inputs::IMAP.new config input.register event = input.parse_mail(subject) - insist { event.get("attachments") } == [ + expect( event.get("attachments") ).to eql [ {"data"=> Base64.encode64(msg_html).encode(crlf_newline: true), "filename"=>"some.html"}, {"data"=> Base64.encode64(msg_binary).encode(crlf_newline: true), "filename"=>"image.png"}, {"data"=> msg_unencoded, "filename"=>"unencoded.data"} From b985b3a6b9de2b33affeefb95820e8da867aa8ce Mon Sep 17 00:00:00 2001 From: kares Date: Wed, 10 Nov 2021 12:34:36 +0100 Subject: [PATCH 02/22] Test: refactor and add header expectations --- spec/inputs/imap_spec.rb | 169 +++++++++++++++++++++++---------------- 1 file changed, 101 insertions(+), 68 deletions(-) diff --git a/spec/inputs/imap_spec.rb b/spec/inputs/imap_spec.rb index bf1e1f1..080081c 100644 --- a/spec/inputs/imap_spec.rb +++ b/spec/inputs/imap_spec.rb @@ -43,129 +43,162 @@ msg_binary = "\x42\x43\x44" msg_unencoded = "raw text 🐐" - subject do + let(:config) do + { "host" => "localhost", "user" => "#{user}", "password" => "#{password}" } + end + + subject(:input) do + LogStash::Inputs::IMAP.new config + end + + let(:mail) do Mail.new do from "me@example.com" to "you@example.com" subject "logstash imap input test" date msg_time body msg_text + message_id '<123@message.id>' # 'Message-ID' header + # let's have some headers: + header['X-Priority'] = '3' + header['X-Bot-ID'] = '111' + header['X-AES-Category'] = 'LEGIT' + header['X-Spam-Category'] = 'LEGIT' + header['Spam-Stopper-Id'] = '464bbb1a-1b86-4006-8a09-ce797fb56346' + header['Spam-Stopper-v2'] = 'Yes' + header['X-Mailer'] = 'Microsoft Outlook Express 6.00.2800.1106' + header['X-MimeOLE'] = 'Produced By Microsoft MimeOLE V6.00.2800.1106' add_file :filename => "some.html", :content => msg_html add_file :filename => "image.png", :content => msg_binary add_file :filename => "unencoded.data", :content => msg_unencoded, :content_transfer_encoding => "7bit" end end - context "with both text and html parts" do - context "when no content-type selected" do - it "should select text/plain part" do - config = {"type" => "imap", "host" => "localhost", - "user" => "#{user}", "password" => "#{password}"} + before do + input.register + end + + context "when no content-type selected" do + it "should select text/plain part" do + event = input.parse_mail(mail) + expect( event.get("message") ).to eql msg_text + end + end - input = LogStash::Inputs::IMAP.new config - input.register - event = input.parse_mail(subject) - expect( event.get("message") ).to eql msg_text + context "when text/html content-type selected" do + let(:config) { super().merge("content_type" => "text/html") } + + it "should select text/html part" do + event = input.parse_mail(mail) + expect( event.get("message") ).to eql msg_html + end + end + + context "mail headers" do + let(:config) { super().merge("lowercase_headers" => true) } # default + + before { @event = input.parse_mail(mail) } + + it "sets all header fields" do + expect( @event.get("x-spam-category") ).to eql 'LEGIT' + expect( @event.get("x-aes-category") ).to eql 'LEGIT' + expect( @event.get("x-bot-id") ).to eql '111' + ['spam-stopper-id', 'spam-stopper-v2', 'x-mimeole', 'message-id', 'x-priority'].each do |name| + expect( @event.include?(name) ).to be true end + expect( @event.get("from") ).to eql 'me@example.com' + expect( @event.get("to") ).to eql 'you@example.com' + expect( @event.get("subject") ).to eql 'logstash imap input test' end - context "when text/html content-type selected" do - it "should select text/html part" do - config = {"type" => "imap", "host" => "localhost", - "user" => "#{user}", "password" => "#{password}", - "content_type" => "text/html"} + it 'does not set date header' do + expect( @event.include?('date') ).to be false + expect( @event.include?('Date') ).to be false + end + end - input = LogStash::Inputs::IMAP.new config - input.register - event = input.parse_mail(subject) - expect( event.get("message") ).to eql msg_html + context "mail headers (not lower-cased)" do + let(:config) { super().merge("lowercase_headers" => false) } + + before { @event = input.parse_mail(mail) } + + it "sets all header fields" do + expect( @event.get("X-Spam-Category") ).to eql 'LEGIT' + expect( @event.get("X-AES-Category") ).to eql 'LEGIT' + expect( @event.get("X-Bot-ID") ).to eql '111' + ['Spam-Stopper-Id', 'Spam-Stopper-v2', 'X-MimeOLE', 'Message-ID', 'X-Priority'].each do |name| + expect( @event.include?(name) ).to be true end + expect( @event.get("From") ).to eql 'me@example.com' + expect( @event.get("To") ).to eql 'you@example.com' + expect( @event.get("Subject") ).to eql 'logstash imap input test' + end + + it 'does not set date header' do + expect( @event.include?('Date') ).to be false end end context "when subject is in RFC 2047 encoded-word format" do - it "should be decoded" do - subject.subject = "=?iso-8859-1?Q?foo_:_bar?=" - config = {"type" => "imap", "host" => "localhost", - "user" => "#{user}", "password" => "#{password}"} + before do + mail.subject = "=?iso-8859-1?Q?foo_:_bar?=" + end - input = LogStash::Inputs::IMAP.new config - input.register - event = input.parse_mail(subject) + it "should be decoded" do + event = input.parse_mail(mail) expect( event.get("subject") ).to eql "foo : bar" end end context "with multiple values for same header" do it "should add 2 values as array in event" do - subject.received = "test1" - subject.received = "test2" - - config = {"type" => "imap", "host" => "localhost", - "user" => "#{user}", "password" => "#{password}"} + mail.received = "test1" + mail.received = "test2" - input = LogStash::Inputs::IMAP.new config - input.register - event = input.parse_mail(subject) + event = input.parse_mail(mail) expect( event.get("received") ).to eql ["test1", "test2"] end it "should add more than 2 values as array in event" do - subject.received = "test1" - subject.received = "test2" - subject.received = "test3" + mail.received = "test1" + mail.received = "test2" + mail.received = "test3" - config = {"type" => "imap", "host" => "localhost", - "user" => "#{user}", "password" => "#{password}"} - - input = LogStash::Inputs::IMAP.new config - input.register - event = input.parse_mail(subject) + event = input.parse_mail(mail) expect( event.get("received") ).to eql ["test1", "test2", "test3"] end end context "when a header field is nil" do it "should parse mail" do - subject.header['X-Custom-Header'] = nil - config = {"type" => "imap", "host" => "localhost", - "user" => "#{user}", "password" => "#{password}"} + mail.header['X-Custom-Header'] = nil - input = LogStash::Inputs::IMAP.new config - input.register - event = input.parse_mail(subject) + event = input.parse_mail(mail) expect( event.get("message") ).to eql msg_text end end - context "with attachments" do + context "attachments" do it "should extract filenames" do - config = {"type" => "imap", "host" => "localhost", - "user" => "#{user}", "password" => "#{password}"} - - input = LogStash::Inputs::IMAP.new config - input.register - event = input.parse_mail(subject) + event = input.parse_mail(mail) expect( event.get("attachments") ).to eql [ {"filename"=>"some.html"}, {"filename"=>"image.png"}, {"filename"=>"unencoded.data"} ] end + end - it "should extract the encoded content" do - config = {"type" => "imap", "host" => "localhost", - "user" => "#{user}", "password" => "#{password}", - "save_attachments" => true} + context "with attachments saving" do + let(:config) { super().merge("save_attachments" => true) } - input = LogStash::Inputs::IMAP.new config - input.register - event = input.parse_mail(subject) + it "should extract the encoded content" do + event = input.parse_mail(mail) expect( event.get("attachments") ).to eql [ - {"data"=> Base64.encode64(msg_html).encode(crlf_newline: true), "filename"=>"some.html"}, - {"data"=> Base64.encode64(msg_binary).encode(crlf_newline: true), "filename"=>"image.png"}, - {"data"=> msg_unencoded, "filename"=>"unencoded.data"} - ] - end + {"data"=> Base64.encode64(msg_html).encode(crlf_newline: true), "filename"=>"some.html"}, + {"data"=> Base64.encode64(msg_binary).encode(crlf_newline: true), "filename"=>"image.png"}, + {"data"=> msg_unencoded, "filename"=>"unencoded.data"} + ] + end end end From fbdc5df77cbfb9438ba791be320a56baf537c125 Mon Sep 17 00:00:00 2001 From: kares Date: Wed, 10 Nov 2021 12:35:21 +0100 Subject: [PATCH 03/22] Fix: skip the 'Date' header early due `lowercase_headers => true` default --- lib/logstash/inputs/imap.rb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index 72811ba..571f577 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -185,16 +185,19 @@ def parse_mail(mail) # Add fields: Add message.header_fields { |h| h.name=> h.value } mail.header_fields.each do |header| # 'header.name' can sometimes be a Mail::Multibyte::Chars, get it in String form - name = @lowercase_headers ? header.name.to_s.downcase : header.name.to_s + name = header.name.to_s + + # Assume we already processed the 'date' above. + next if name == "Date" + + name = name.downcase if @lowercase_headers + # Call .decoded on the header in case it's in encoded-word form. # Details at: # https://github.com/mikel/mail/blob/master/README.md#encodings # http://tools.ietf.org/html/rfc2047#section-2 value = transcode_to_utf8(header.decoded.to_s) - # Assume we already processed the 'date' above. - next if name == "Date" - case (field = event.get(name)) when String # promote string to array if a header appears multiple times From 70ba899f93b8a3916c2a121f5f1695e20c9b3f03 Mon Sep 17 00:00:00 2001 From: kares Date: Wed, 10 Nov 2021 12:58:15 +0100 Subject: [PATCH 04/22] Refactor: re-arrange config options --- lib/logstash/inputs/imap.rb | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index 571f577..f2b392c 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -23,15 +23,17 @@ class LogStash::Inputs::IMAP < LogStash::Inputs::Base config :folder, :validate => :string, :default => 'INBOX' config :fetch_count, :validate => :number, :default => 50 - config :lowercase_headers, :validate => :boolean, :default => true config :check_interval, :validate => :number, :default => 300 + + config :lowercase_headers, :validate => :boolean, :default => true + config :delete, :validate => :boolean, :default => false config :expunge, :validate => :boolean, :default => false + config :strip_attachments, :validate => :boolean, :default => false config :save_attachments, :validate => :boolean, :default => false - # For multipart messages, use the first part that has this - # content-type as the event message. + # For multipart messages, use the first part that has this content-type as the event message. config :content_type, :validate => :string, :default => "text/plain" # Whether to use IMAP uid to track last processed message From bac62de5438b370f37243a219b9132cd501aa119 Mon Sep 17 00:00:00 2001 From: kares Date: Wed, 10 Nov 2021 15:03:38 +0100 Subject: [PATCH 05/22] Refactor: less noisy (debug) logging --- lib/logstash/inputs/imap.rb | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index f2b392c..76b8888 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -64,14 +64,16 @@ def register # Ensure that the filepath exists before writing, since it's deeply nested. FileUtils::mkdir_p datapath @sincedb_path = File.join(datapath, ".sincedb_" + Digest::MD5.hexdigest("#{@user}_#{@host}_#{@port}_#{@folder}")) + @logger.debug? && @logger.debug("Generated sincedb path", sincedb_path: @sincedb_path) end - if File.directory?(@sincedb_path) - raise ArgumentError.new("The \"sincedb_path\" argument must point to a file, received a directory: \"#{@sincedb_path}\"") - end - @logger.info("Using \"sincedb_path\": \"#{@sincedb_path}\"") + if File.exist?(@sincedb_path) + if File.directory?(@sincedb_path) + raise ArgumentError.new("The \"sincedb_path\" argument must point to a file, received a directory: \"#{@sincedb_path}\"") + end + @logger.debug? && @logger.debug("Found existing sincedb path", sincedb_path: @sincedb_path) @uid_last_value = File.read(@sincedb_path).to_i - @logger.info("Loading \"uid_last_value\": \"#{@uid_last_value}\"") + @logger.debug? && @logger.debug("Loaded from sincedb", uid_last_value: @uid_last_value) end @content_type_re = Regexp.new("^" + @content_type) @@ -146,7 +148,7 @@ def check_mail(queue) # Always save @uid_last_value so when tracking is switched from # "NOT SEEN" to "UID" we will continue from first unprocessed message if @uid_last_value - @logger.info("Saving \"uid_last_value\": \"#{@uid_last_value}\"") + @logger.debug? && @logger.debug("Saving to sincedb", uid_last_value: @uid_last_value) File.write(@sincedb_path, @uid_last_value) end end @@ -165,7 +167,8 @@ def parse_attachments(mail) def parse_mail(mail) # Add a debug message so we can track what message might cause an error later - @logger.debug? && @logger.debug("Working with message_id", :message_id => mail.message_id) + @logger.debug? && @logger.debug("Processing mail", message_id: mail.message_id) + # TODO(sissel): What should a multipart message look like as an event? # For now, just take the plain-text part and set it as the message. if mail.parts.count == 0 From 5927108d9c4cf93edd8cdf91095cb60328ed7379 Mon Sep 17 00:00:00 2001 From: kares Date: Wed, 10 Nov 2021 15:04:31 +0100 Subject: [PATCH 06/22] Fix: plugin should not close $stdin, while stoping --- lib/logstash/inputs/imap.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index 76b8888..32a2258 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -228,7 +228,6 @@ def parse_mail(mail) def stop Stud.stop!(@run_thread) - $stdin.close end private From e5530a742b17e63f58cb33989de49d34c201a4cf Mon Sep 17 00:00:00 2001 From: kares Date: Wed, 10 Nov 2021 15:29:11 +0100 Subject: [PATCH 07/22] Fix: a missing require for FileUtils --- lib/logstash/inputs/imap.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index 32a2258..8564d4a 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -3,6 +3,7 @@ require "logstash/namespace" require "logstash/timestamp" require "stud/interval" +require 'fileutils' # Read mails from IMAP server # From a65d8c2609c75708db5b1fba7630ab14e234d068 Mon Sep 17 00:00:00 2001 From: kares Date: Wed, 10 Nov 2021 15:30:13 +0100 Subject: [PATCH 08/22] Feat: support a headers_target (ECS) --- lib/logstash/inputs/imap.rb | 40 +++++- logstash-input-imap.gemspec | 2 + spec/inputs/imap_spec.rb | 238 ++++++++++++++++++++++-------------- 3 files changed, 179 insertions(+), 101 deletions(-) diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index 8564d4a..ea0bd27 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -5,11 +5,20 @@ require "stud/interval" require 'fileutils' +require 'logstash/plugin_mixins/ecs_compatibility_support' +require 'logstash/plugin_mixins/ecs_compatibility_support/target_check' +require 'logstash/plugin_mixins/validator_support/field_reference_validation_adapter' + # Read mails from IMAP server # # Periodically scan an IMAP folder (`INBOX` by default) and move any read messages # to the trash. class LogStash::Inputs::IMAP < LogStash::Inputs::Base + + include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1) + + extend LogStash::PluginMixins::ValidatorSupport::FieldReferenceValidationAdapter + config_name "imap" default :codec, "plain" @@ -28,6 +37,8 @@ class LogStash::Inputs::IMAP < LogStash::Inputs::Base config :lowercase_headers, :validate => :boolean, :default => true + config :headers_target, :validate => :field_reference # ECS default: [@metadata][input][imap][headers] + config :delete, :validate => :boolean, :default => false config :expunge, :validate => :boolean, :default => false @@ -43,6 +54,23 @@ class LogStash::Inputs::IMAP < LogStash::Inputs::Base # Path to file with last run time metadata config :sincedb_path, :validate => :string, :required => false + def initialize(*params) + super + + if ecs_compatibility != :disabled # set ECS target defaults + @headers_target = '[@metadata][input][imap][headers]' unless original_params.include?('headers_target') + end + unless @headers_target.nil? + @headers_target = normalize_field_ref(@headers_target) + end + end + + def normalize_field_ref(target) + # so we can later event.set("#{target}[#{name}]", ...) + target.match?(/\A[^\[\]]+\z/) ? "[#{target}]" : target + end + private :normalize_field_ref + def register require "net/imap" # in stdlib require "mail" # gem 'mail' @@ -204,16 +232,16 @@ def parse_mail(mail) # http://tools.ietf.org/html/rfc2047#section-2 value = transcode_to_utf8(header.decoded.to_s) - case (field = event.get(name)) + targeted_name = "#{@headers_target}[#{name}]" + case (field = event.get(targeted_name)) when String - # promote string to array if a header appears multiple times - # (like 'received') - event.set(name, [field, value]) + # promote string to array if a header appears multiple times (like 'received') + event.set(targeted_name, [field, value]) when Array field << value - event.set(name, field) + event.set(targeted_name, field) when nil - event.set(name, value) + event.set(targeted_name, value) end end diff --git a/logstash-input-imap.gemspec b/logstash-input-imap.gemspec index 80e89b6..e8d245c 100644 --- a/logstash-input-imap.gemspec +++ b/logstash-input-imap.gemspec @@ -21,6 +21,8 @@ Gem::Specification.new do |s| # Gem dependencies s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" + s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~> 1.3' + s.add_runtime_dependency 'logstash-mixin-validator_support', '~> 1.0' s.add_runtime_dependency 'logstash-codec-plain' s.add_runtime_dependency 'mail', '~> 2.6.3' s.add_runtime_dependency 'mime-types', '2.6.2' diff --git a/spec/inputs/imap_spec.rb b/spec/inputs/imap_spec.rb index 080081c..996b22b 100644 --- a/spec/inputs/imap_spec.rb +++ b/spec/inputs/imap_spec.rb @@ -1,6 +1,7 @@ # encoding: utf-8 require "logstash/devutils/rspec/spec_helper" require "logstash/devutils/rspec/shared_examples" +require 'logstash/plugin_mixins/ecs_compatibility_support/spec_helper' require "logstash/inputs/imap" require "mail" require "net/imap" @@ -34,7 +35,7 @@ end -describe LogStash::Inputs::IMAP do +describe LogStash::Inputs::IMAP, :ecs_compatibility_support do user = "logstash" password = "secret" msg_time = Time.new @@ -78,127 +79,174 @@ input.register end - context "when no content-type selected" do - it "should select text/plain part" do - event = input.parse_mail(mail) - expect( event.get("message") ).to eql msg_text - end - end - - context "when text/html content-type selected" do - let(:config) { super().merge("content_type" => "text/html") } + ecs_compatibility_matrix(:disabled, :v1, :v8) do |ecs_select| - it "should select text/html part" do - event = input.parse_mail(mail) - expect( event.get("message") ).to eql msg_html - end - end + let(:ecs_compatibility?) { ecs_select.active_mode != :disabled } - context "mail headers" do - let(:config) { super().merge("lowercase_headers" => true) } # default + let (:config) { super().merge('ecs_compatibility' => ecs_select.active_mode) } - before { @event = input.parse_mail(mail) } - - it "sets all header fields" do - expect( @event.get("x-spam-category") ).to eql 'LEGIT' - expect( @event.get("x-aes-category") ).to eql 'LEGIT' - expect( @event.get("x-bot-id") ).to eql '111' - ['spam-stopper-id', 'spam-stopper-v2', 'x-mimeole', 'message-id', 'x-priority'].each do |name| - expect( @event.include?(name) ).to be true + context "when no content-type selected" do + it "should select text/plain part" do + event = input.parse_mail(mail) + expect( event.get("message") ).to eql msg_text end - expect( @event.get("from") ).to eql 'me@example.com' - expect( @event.get("to") ).to eql 'you@example.com' - expect( @event.get("subject") ).to eql 'logstash imap input test' - end - - it 'does not set date header' do - expect( @event.include?('date') ).to be false - expect( @event.include?('Date') ).to be false end - end - context "mail headers (not lower-cased)" do - let(:config) { super().merge("lowercase_headers" => false) } + context "when text/html content-type selected" do + let(:config) { super().merge("content_type" => "text/html") } - before { @event = input.parse_mail(mail) } - - it "sets all header fields" do - expect( @event.get("X-Spam-Category") ).to eql 'LEGIT' - expect( @event.get("X-AES-Category") ).to eql 'LEGIT' - expect( @event.get("X-Bot-ID") ).to eql '111' - ['Spam-Stopper-Id', 'Spam-Stopper-v2', 'X-MimeOLE', 'Message-ID', 'X-Priority'].each do |name| - expect( @event.include?(name) ).to be true + it "should select text/html part" do + event = input.parse_mail(mail) + expect( event.get("message") ).to eql msg_html end - expect( @event.get("From") ).to eql 'me@example.com' - expect( @event.get("To") ).to eql 'you@example.com' - expect( @event.get("Subject") ).to eql 'logstash imap input test' end - it 'does not set date header' do - expect( @event.include?('Date') ).to be false - end - end + context "mail headers" do + let(:config) { super().merge("lowercase_headers" => true) } # default + + before { @event = input.parse_mail(mail) } + + it "sets all header fields" do + if ecs_compatibility? + expect( @event.get("[@metadata][input][imap][headers][x-spam-category]") ).to eql 'LEGIT' + expect( @event.get("[@metadata][input][imap][headers][x-aes-category]") ).to eql 'LEGIT' + expect( @event.get("[@metadata][input][imap][headers][x-bot-id]") ).to eql '111' + ['spam-stopper-id', 'spam-stopper-v2', 'x-mimeole', 'message-id', 'x-priority'].each do |name| + expect( @event.include?("[@metadata][input][imap][headers][#{name}]") ).to be true + end + expect( @event.get("[@metadata][input][imap][headers][from]") ).to eql 'me@example.com' + expect( @event.get("[@metadata][input][imap][headers][to]") ).to eql 'you@example.com' + expect( @event.get("[@metadata][input][imap][headers][subject]") ).to eql 'logstash imap input test' + else + expect( @event.get("x-spam-category") ).to eql 'LEGIT' + expect( @event.get("x-aes-category") ).to eql 'LEGIT' + expect( @event.get("x-bot-id") ).to eql '111' + ['spam-stopper-id', 'spam-stopper-v2', 'x-mimeole', 'message-id', 'x-priority'].each do |name| + expect( @event.include?(name) ).to be true + end + expect( @event.get("from") ).to eql 'me@example.com' + expect( @event.get("to") ).to eql 'you@example.com' + expect( @event.get("subject") ).to eql 'logstash imap input test' + end + end - context "when subject is in RFC 2047 encoded-word format" do - before do - mail.subject = "=?iso-8859-1?Q?foo_:_bar?=" + it 'does not set date header' do + expect( @event.include?('date') ).to be false + expect( @event.include?('Date') ).to be false + end end - it "should be decoded" do - event = input.parse_mail(mail) - expect( event.get("subject") ).to eql "foo : bar" + context "mail headers (not lower-cased)" do + let(:config) { super().merge("lowercase_headers" => false) } + + before { @event = input.parse_mail(mail) } + + it "sets all header fields" do + if ecs_compatibility? + expect( @event.get("[@metadata][input][imap][headers][X-Spam-Category]") ).to eql 'LEGIT' + expect( @event.get("[@metadata][input][imap][headers][X-AES-Category]") ).to eql 'LEGIT' + expect( @event.get("[@metadata][input][imap][headers][X-Bot-ID]") ).to eql '111' + ['Spam-Stopper-Id', 'Spam-Stopper-v2', 'X-MimeOLE', 'Message-ID', 'X-Priority'].each do |name| + expect( @event.include?("[@metadata][input][imap][headers][#{name}]") ).to be true + end + expect( @event.get("[@metadata][input][imap][headers][From]") ).to eql 'me@example.com' + expect( @event.get("[@metadata][input][imap][headers][To]") ).to eql 'you@example.com' + expect( @event.get("[@metadata][input][imap][headers][Subject]") ).to eql 'logstash imap input test' + else + expect( @event.get("X-Spam-Category") ).to eql 'LEGIT' + expect( @event.get("X-AES-Category") ).to eql 'LEGIT' + expect( @event.get("X-Bot-ID") ).to eql '111' + ['Spam-Stopper-Id', 'Spam-Stopper-v2', 'X-MimeOLE', 'Message-ID', 'X-Priority'].each do |name| + expect( @event.include?(name) ).to be true + end + expect( @event.get("From") ).to eql 'me@example.com' + expect( @event.get("To") ).to eql 'you@example.com' + expect( @event.get("Subject") ).to eql 'logstash imap input test' + end + end + + it 'does not set date header' do + expect( @event.include?('Date') ).to be false + end end - end - context "with multiple values for same header" do - it "should add 2 values as array in event" do - mail.received = "test1" - mail.received = "test2" + context "when subject is in RFC 2047 encoded-word format" do + before do + mail.subject = "=?iso-8859-1?Q?foo_:_bar?=" + end - event = input.parse_mail(mail) - expect( event.get("received") ).to eql ["test1", "test2"] + it "should be decoded" do + event = input.parse_mail(mail) + if ecs_compatibility? + expect( event.get("[@metadata][input][imap][headers][subject]") ).to eql "foo : bar" + else + expect( event.get("subject") ).to eql "foo : bar" + end + end end - it "should add more than 2 values as array in event" do - mail.received = "test1" - mail.received = "test2" - mail.received = "test3" + context "with multiple values for same header" do + it "should add 2 values as array in event" do + mail.received = "test1" + mail.received = "test2" + + event = input.parse_mail(mail) + expected_value = ["test1", "test2"] + if ecs_compatibility? + expect( event.get("[@metadata][input][imap][headers][received]") ).to eql expected_value + else + expect( event.get("received") ).to eql expected_value + end + end - event = input.parse_mail(mail) - expect( event.get("received") ).to eql ["test1", "test2", "test3"] + it "should add more than 2 values as array in event" do + mail.received = "test1" + mail.received = "test2" + mail.received = "test3" + + event = input.parse_mail(mail) + expected_value = ["test1", "test2", "test3"] + if ecs_compatibility? + expect( event.get("[@metadata][input][imap][headers][received]") ).to eql expected_value + else + expect( event.get("received") ).to eql expected_value + end + end end - end - context "when a header field is nil" do - it "should parse mail" do - mail.header['X-Custom-Header'] = nil + context "when a header field is nil" do + it "should parse mail" do + mail.header['X-Custom-Header'] = nil - event = input.parse_mail(mail) - expect( event.get("message") ).to eql msg_text + event = input.parse_mail(mail) + expect( event.get("message") ).to eql msg_text + end end - end - context "attachments" do - it "should extract filenames" do - event = input.parse_mail(mail) - expect( event.get("attachments") ).to eql [ - {"filename"=>"some.html"}, - {"filename"=>"image.png"}, - {"filename"=>"unencoded.data"} - ] + context "attachments" do + it "should extract filenames" do + event = input.parse_mail(mail) + expect( event.get("attachments") ).to eql [ + {"filename"=>"some.html"}, + {"filename"=>"image.png"}, + {"filename"=>"unencoded.data"} + ] + end end - end - context "with attachments saving" do - let(:config) { super().merge("save_attachments" => true) } + context "with attachments saving" do + let(:config) { super().merge("save_attachments" => true) } - it "should extract the encoded content" do - event = input.parse_mail(mail) - expect( event.get("attachments") ).to eql [ - {"data"=> Base64.encode64(msg_html).encode(crlf_newline: true), "filename"=>"some.html"}, - {"data"=> Base64.encode64(msg_binary).encode(crlf_newline: true), "filename"=>"image.png"}, - {"data"=> msg_unencoded, "filename"=>"unencoded.data"} - ] + it "should extract the encoded content" do + event = input.parse_mail(mail) + expect( event.get("attachments") ).to eql [ + {"data"=> Base64.encode64(msg_html).encode(crlf_newline: true), "filename"=>"some.html"}, + {"data"=> Base64.encode64(msg_binary).encode(crlf_newline: true), "filename"=>"image.png"}, + {"data"=> msg_unencoded, "filename"=>"unencoded.data"} + ] + end end + end + end From 96f3ba76fb81a7668a531b72e9b9452ef13ffb91 Mon Sep 17 00:00:00 2001 From: kares Date: Wed, 10 Nov 2021 16:19:05 +0100 Subject: [PATCH 09/22] Feat: support a attachments target --- lib/logstash/inputs/imap.rb | 19 ++++++++++++++----- spec/inputs/imap_spec.rb | 22 ++++++++++++---------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index ea0bd27..1c69383 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -45,6 +45,10 @@ class LogStash::Inputs::IMAP < LogStash::Inputs::Base config :strip_attachments, :validate => :boolean, :default => false config :save_attachments, :validate => :boolean, :default => false + # Legacy default: [attachments] + # ECS default: [@metadata][input][imap][attachments] + config :attachments_target, :validate => :field_reference + # For multipart messages, use the first part that has this content-type as the event message. config :content_type, :validate => :string, :default => "text/plain" @@ -57,11 +61,16 @@ class LogStash::Inputs::IMAP < LogStash::Inputs::Base def initialize(*params) super - if ecs_compatibility != :disabled # set ECS target defaults - @headers_target = '[@metadata][input][imap][headers]' unless original_params.include?('headers_target') - end - unless @headers_target.nil? + if original_params.include?('headers_target') @headers_target = normalize_field_ref(@headers_target) + else + @headers_target = '[@metadata][input][imap][headers]' if ecs_compatibility != :disabled + end + + if original_params.include?('attachments_target') + @attachments_target = normalize_field_ref(@attachments_target) + else + @attachments_target = ecs_compatibility != :disabled ? '[@metadata][input][imap][attachments]' : '[attachments]' end end @@ -247,7 +256,7 @@ def parse_mail(mail) # Add attachments if attachments && attachments.length > 0 - event.set('attachments', attachments) + event.set(@attachments_target, attachments) end decorate(event) diff --git a/spec/inputs/imap_spec.rb b/spec/inputs/imap_spec.rb index 996b22b..f44b28d 100644 --- a/spec/inputs/imap_spec.rb +++ b/spec/inputs/imap_spec.rb @@ -226,11 +226,12 @@ context "attachments" do it "should extract filenames" do event = input.parse_mail(mail) - expect( event.get("attachments") ).to eql [ - {"filename"=>"some.html"}, - {"filename"=>"image.png"}, - {"filename"=>"unencoded.data"} - ] + target = ecs_compatibility? ? '[@metadata][input][imap][attachments]' : 'attachments' + expect( event.get(target) ).to eql [ + {"filename"=>"some.html"}, + {"filename"=>"image.png"}, + {"filename"=>"unencoded.data"} + ] end end @@ -239,11 +240,12 @@ it "should extract the encoded content" do event = input.parse_mail(mail) - expect( event.get("attachments") ).to eql [ - {"data"=> Base64.encode64(msg_html).encode(crlf_newline: true), "filename"=>"some.html"}, - {"data"=> Base64.encode64(msg_binary).encode(crlf_newline: true), "filename"=>"image.png"}, - {"data"=> msg_unencoded, "filename"=>"unencoded.data"} - ] + target = ecs_compatibility? ? '[@metadata][input][imap][attachments]' : 'attachments' + expect( event.get(target) ).to eql [ + {"data"=> Base64.encode64(msg_html).encode(crlf_newline: true), "filename"=>"some.html"}, + {"data"=> Base64.encode64(msg_binary).encode(crlf_newline: true), "filename"=>"image.png"}, + {"data"=> msg_unencoded, "filename"=>"unencoded.data"} + ] end end From a318b096efb8c0ec5d90f0f8ecb68c576c3ccc60 Mon Sep 17 00:00:00 2001 From: kares Date: Wed, 10 Nov 2021 16:46:09 +0100 Subject: [PATCH 10/22] bump + changelog --- CHANGELOG.md | 7 +++++++ logstash-input-imap.gemspec | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b45d96..52679fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## 3.2.0 + - Feat: ECS compatibility [#55](https://github.com/logstash-plugins/logstash-input-imap/pull/55) + * added (optional) `headers_target` configuration option + * added (optional) `attachments_target` configuration option + - Fix: plugin should not close `$stdin`, while stoping + - Fix: make sure the 'Date' header is skipped regardless of the `lowercase_headers` setting + ## 3.1.0 - Adds an option to recursively search the message parts for attachment and inline attachment filenames. If the save_attachments option is set to true, the content of attachments is included in the `attachments.data` field. The attachment data can then be used by the Elasticsearch Ingest Attachment Processor Plugin. [#48](https://github.com/logstash-plugins/logstash-input-imap/pull/48) diff --git a/logstash-input-imap.gemspec b/logstash-input-imap.gemspec index e8d245c..5faef12 100644 --- a/logstash-input-imap.gemspec +++ b/logstash-input-imap.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-imap' - s.version = '3.1.0' + s.version = '3.2.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads mail from an IMAP server" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From c002ec7cab88d6a9caa69d7e25370d2ece7f3f26 Mon Sep 17 00:00:00 2001 From: kares Date: Wed, 10 Nov 2021 16:46:52 +0100 Subject: [PATCH 11/22] Docs: ecs_compatibility, headers_target, attachments_target --- docs/index.asciidoc | 49 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 084381e..6d087b8 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -26,6 +26,14 @@ Read mails from IMAP server Periodically scan an IMAP folder (`INBOX` by default) and move any read messages to the trash. +==== Compatibility with the Elastic Common Schema (ECS) + +The plugin includes sensible defaults that change based on <>. +When ECS compatibility is disabled, headers and attachments are targeted at the root level. +When targeting an ECS versionn, headers and attachments target `@metadata` sub-fields unless configured otherwise in order +to avoid conflict with ECS fields. +See <>, and <>. + [id="plugins-{type}s-{plugin}-options"] ==== Imap Input Configuration Options @@ -34,12 +42,15 @@ This plugin supports the following configuration options plus the <> |<>|No | <> |<>|No | <> |<>|No | <> |<>|No +| <> |<>|No | <> |<>|No | <> |<>|No | <> |<>|No +| <> |<>|No | <> |<>|Yes | <> |<>|No | <> |<>|Yes @@ -58,6 +69,16 @@ input plugins.   +[id="plugins-{type}s-{plugin}-attachments_target"] +===== `attachments_target` + + * Value type is <> + * Default value depends on whether <> is enabled: + ** ECS Compatibility disabled: no default value for this setting + ** ECS Compatibility enabled: `"[@metadata][input][imap][attachments]" + +The name of the field under which mail attachments information will be added, if <> is set. + [id="plugins-{type}s-{plugin}-check_interval"] ===== `check_interval` @@ -72,8 +93,7 @@ input plugins. * Value type is <> * Default value is `"text/plain"` -For multipart messages, use the first part that has this -content-type as the event message. +For multipart messages, use the first part that has this content-type as the event message. [id="plugins-{type}s-{plugin}-delete"] ===== `delete` @@ -83,6 +103,21 @@ content-type as the event message. +[id="plugins-{type}s-{plugin}-ecs_compatibility"] +===== `ecs_compatibility` + + * Value type is <> + * Supported values are: + ** `disabled`: does not use ECS-compatible field names (for example, `From` header field is added to the event) + ** `v1`, `v8`: avoids field names that might conflict with Elastic Common Schema (for example, the `From` header is added as metadata) + * Default value depends on which version of Logstash is running: + ** When Logstash provides a `pipeline.ecs_compatibility` setting, its value is used as the default + ** Otherwise, the default value is `disabled`. + +Controls this plugin's compatibility with the {ecs-ref}[Elastic Common Schema (ECS)]. +The value of this setting affects the _default_ value of <> and +<>. + [id="plugins-{type}s-{plugin}-expunge"] ===== `expunge` @@ -107,6 +142,16 @@ content-type as the event message. +[id="plugins-{type}s-{plugin}-headers_target"] +===== `headers_target` + + * Value type is <> + * Default value depends on whether <> is enabled: + ** ECS Compatibility disabled: no default value for this setting + ** ECS Compatibility enabled: `"[@metadata][input][imap][headers]" + +The name of the field under which mail headers will be added. + [id="plugins-{type}s-{plugin}-host"] ===== `host` From df0e436882b4264b9d47d2316f5bdaa6e88620c2 Mon Sep 17 00:00:00 2001 From: kares Date: Wed, 10 Nov 2021 16:54:34 +0100 Subject: [PATCH 12/22] Docs: typos --- docs/index.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 6d087b8..b475f87 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -29,8 +29,8 @@ to the trash. ==== Compatibility with the Elastic Common Schema (ECS) The plugin includes sensible defaults that change based on <>. -When ECS compatibility is disabled, headers and attachments are targeted at the root level. -When targeting an ECS versionn, headers and attachments target `@metadata` sub-fields unless configured otherwise in order +When ECS compatibility is disabled, mail headers and attachments are targeted at the root level. +When targeting an ECS version, headers and attachments target `@metadata` sub-fields unless configured otherwise in order to avoid conflict with ECS fields. See <>, and <>. From 7ac861b715e8d8bbcc862f36d52c96828ccbe833 Mon Sep 17 00:00:00 2001 From: kares Date: Tue, 16 Nov 2021 10:23:58 +0100 Subject: [PATCH 13/22] Docs: ecs link --- docs/index.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index b475f87..3916c47 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -26,6 +26,7 @@ Read mails from IMAP server Periodically scan an IMAP folder (`INBOX` by default) and move any read messages to the trash. +[id="plugins-{type}s-{plugin}-ecs"] ==== Compatibility with the Elastic Common Schema (ECS) The plugin includes sensible defaults that change based on <>. From 0e35498fb1119d9d65e4165ab049c544efe42262 Mon Sep 17 00:00:00 2001 From: kares Date: Tue, 23 Nov 2021 07:16:31 +0100 Subject: [PATCH 14/22] Refactor: restore sincedb_path logging at info --- lib/logstash/inputs/imap.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index 1c69383..2f35598 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -104,12 +104,11 @@ def register @sincedb_path = File.join(datapath, ".sincedb_" + Digest::MD5.hexdigest("#{@user}_#{@host}_#{@port}_#{@folder}")) @logger.debug? && @logger.debug("Generated sincedb path", sincedb_path: @sincedb_path) end - + @logger.info("Using", sincedb_path: @sincedb_path) if File.exist?(@sincedb_path) if File.directory?(@sincedb_path) raise ArgumentError.new("The \"sincedb_path\" argument must point to a file, received a directory: \"#{@sincedb_path}\"") end - @logger.debug? && @logger.debug("Found existing sincedb path", sincedb_path: @sincedb_path) @uid_last_value = File.read(@sincedb_path).to_i @logger.debug? && @logger.debug("Loaded from sincedb", uid_last_value: @uid_last_value) end From cfba0769f645efa397085589f2c57637b70840b0 Mon Sep 17 00:00:00 2001 From: kares Date: Wed, 24 Nov 2021 18:42:59 +0100 Subject: [PATCH 15/22] Docs: fix missing ` --- docs/index.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 3916c47..1f9d8e6 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -149,7 +149,7 @@ The value of this setting affects the _default_ value of <> * Default value depends on whether <> is enabled: ** ECS Compatibility disabled: no default value for this setting - ** ECS Compatibility enabled: `"[@metadata][input][imap][headers]" + ** ECS Compatibility enabled: `"[@metadata][input][imap][headers]"` The name of the field under which mail headers will be added. From 3196ff43d1a19c42c03c73022040f868bf03d234 Mon Sep 17 00:00:00 2001 From: kares Date: Thu, 18 Nov 2021 10:55:12 +0100 Subject: [PATCH 16/22] Refactor: take decoded string (if nil) otherwise a potential nil.to_s would never be nil --- lib/logstash/inputs/imap.rb | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index 2f35598..f9c0957 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -238,7 +238,7 @@ def parse_mail(mail) # Details at: # https://github.com/mikel/mail/blob/master/README.md#encodings # http://tools.ietf.org/html/rfc2047#section-2 - value = transcode_to_utf8(header.decoded.to_s) + value = transcode_to_utf8(header.decoded) targeted_name = "#{@headers_target}[#{name}]" case (field = event.get(targeted_name)) @@ -273,8 +273,7 @@ def stop # the mail gem will set the correct encoding on header strings decoding # and we want to transcode it to utf8 def transcode_to_utf8(s) - unless s.nil? - s.encode(Encoding::UTF_8, :invalid => :replace, :undef => :replace) - end + return nil if s.nil? + s.encode(Encoding::UTF_8, :invalid => :replace, :undef => :replace) end end From 64253a8c9d9cf20750e188e66138768104916474 Mon Sep 17 00:00:00 2001 From: kares Date: Thu, 18 Nov 2021 11:17:40 +0100 Subject: [PATCH 17/22] Refactor: extract into standalone method --- lib/logstash/inputs/imap.rb | 61 +++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index f9c0957..52e32d8 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -176,7 +176,6 @@ def check_mail(queue) rescue => e @logger.error("Encountered error #{e.class}", :message => e.message, :backtrace => e.backtrace) # Do not raise error, check_mail will be invoked in the next run time - ensure # Close the connection (and ignore errors) imap.close rescue nil @@ -224,34 +223,7 @@ def parse_mail(mail) # Use the 'Date' field as the timestamp event.timestamp = LogStash::Timestamp.new(mail.date.to_time) - # Add fields: Add message.header_fields { |h| h.name=> h.value } - mail.header_fields.each do |header| - # 'header.name' can sometimes be a Mail::Multibyte::Chars, get it in String form - name = header.name.to_s - - # Assume we already processed the 'date' above. - next if name == "Date" - - name = name.downcase if @lowercase_headers - - # Call .decoded on the header in case it's in encoded-word form. - # Details at: - # https://github.com/mikel/mail/blob/master/README.md#encodings - # http://tools.ietf.org/html/rfc2047#section-2 - value = transcode_to_utf8(header.decoded) - - targeted_name = "#{@headers_target}[#{name}]" - case (field = event.get(targeted_name)) - when String - # promote string to array if a header appears multiple times (like 'received') - event.set(targeted_name, [field, value]) - when Array - field << value - event.set(targeted_name, field) - when nil - event.set(targeted_name, value) - end - end + process_headers(mail, event) # Add attachments if attachments && attachments.length > 0 @@ -263,6 +235,37 @@ def parse_mail(mail) end end + def process_headers(mail, event) + # Add fields: Add message.header_fields { |h| h.name=> h.value } + mail.header_fields.each do |header| + # 'header.name' can sometimes be a Mail::Multibyte::Chars, get it in String form + name = header.name.to_s + + # assume we already processed the 'date' into event.timestamp + next if name == "Date" + + name = name.downcase if @lowercase_headers + + # Call .decoded on the header in case it's in encoded-word form. + # Details at: + # https://github.com/mikel/mail/blob/master/README.md#encodings + # http://tools.ietf.org/html/rfc2047#section-2 + value = transcode_to_utf8(header.decoded) + + targeted_name = "#{@headers_target}[#{name}]" + case (field = event.get(targeted_name)) + when String + # promote string to array if a header appears multiple times (like 'received') + event.set(targeted_name, [field, value]) + when Array + field << value + event.set(targeted_name, field) + when nil + event.set(targeted_name, value) + end + end + end + def stop Stud.stop!(@run_thread) end From dd381bf9beee81fd2128e6a181b9dae144b988b6 Mon Sep 17 00:00:00 2001 From: kares Date: Mon, 22 Nov 2021 12:19:02 +0100 Subject: [PATCH 18/22] need to be able to disable targets with => '' --- lib/logstash/inputs/imap.rb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index 52e32d8..b3bae88 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -64,7 +64,7 @@ def initialize(*params) if original_params.include?('headers_target') @headers_target = normalize_field_ref(@headers_target) else - @headers_target = '[@metadata][input][imap][headers]' if ecs_compatibility != :disabled + @headers_target = ecs_compatibility != :disabled ? '[@metadata][input][imap][headers]' : '' end if original_params.include?('attachments_target') @@ -75,6 +75,7 @@ def initialize(*params) end def normalize_field_ref(target) + return nil if target.nil? || target.empty? # so we can later event.set("#{target}[#{name}]", ...) target.match?(/\A[^\[\]]+\z/) ? "[#{target}]" : target end @@ -223,10 +224,10 @@ def parse_mail(mail) # Use the 'Date' field as the timestamp event.timestamp = LogStash::Timestamp.new(mail.date.to_time) - process_headers(mail, event) + process_headers(mail, event) if @headers_target # Add attachments - if attachments && attachments.length > 0 + if attachments && attachments.length > 0 && @attachments_target event.set(@attachments_target, attachments) end From 79a5eae7b21df794a4887a4fe271d09e0c51c30d Mon Sep 17 00:00:00 2001 From: kares Date: Tue, 30 Nov 2021 10:03:54 +0100 Subject: [PATCH 19/22] a few notes + spec headers_target => '' --- lib/logstash/inputs/imap.rb | 7 +++++-- spec/inputs/imap_spec.rb | 13 +++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index b3bae88..7612062 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -62,18 +62,21 @@ def initialize(*params) super if original_params.include?('headers_target') - @headers_target = normalize_field_ref(@headers_target) + @headers_target = normalize_field_ref(headers_target) else + # NOTE: user specified `headers_target => ''` means disable headers (@headers_target == nil) + # unlike our default here (@headers_target == '') causes setting headers at top level ... @headers_target = ecs_compatibility != :disabled ? '[@metadata][input][imap][headers]' : '' end if original_params.include?('attachments_target') - @attachments_target = normalize_field_ref(@attachments_target) + @attachments_target = normalize_field_ref(attachments_target) else @attachments_target = ecs_compatibility != :disabled ? '[@metadata][input][imap][attachments]' : '[attachments]' end end + # @note a '' target value is normalized to nil def normalize_field_ref(target) return nil if target.nil? || target.empty? # so we can later event.set("#{target}[#{name}]", ...) diff --git a/spec/inputs/imap_spec.rb b/spec/inputs/imap_spec.rb index f44b28d..531cfcf 100644 --- a/spec/inputs/imap_spec.rb +++ b/spec/inputs/imap_spec.rb @@ -170,6 +170,19 @@ end end + context "headers_target => ''" do + let(:config) { super().merge("headers_target" => '') } + + before { @event = input.parse_mail(mail) } + + it "does not set any header fields" do + ['From', 'To', 'Subject', 'subject', 'Date', 'date'].each do |name| + expect( @event.include?(name) ).to be false # legacy + expect( @event.include?("[@metadata][input][imap][headers][#{name}]") ).to be false # ecs + end + end + end + context "when subject is in RFC 2047 encoded-word format" do before do mail.subject = "=?iso-8859-1?Q?foo_:_bar?=" From bfcb44fd6ea87ed8317c840203f0d9841dd40127 Mon Sep 17 00:00:00 2001 From: kares Date: Tue, 30 Nov 2021 10:19:29 +0100 Subject: [PATCH 20/22] Docs: note on '' disabling the target --- docs/index.asciidoc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 1f9d8e6..832b002 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -75,7 +75,7 @@ input plugins. * Value type is <> * Default value depends on whether <> is enabled: - ** ECS Compatibility disabled: no default value for this setting + ** ECS Compatibility disabled: `"[attachments]"` ** ECS Compatibility enabled: `"[@metadata][input][imap][attachments]" The name of the field under which mail attachments information will be added, if <> is set. @@ -148,11 +148,14 @@ The value of this setting affects the _default_ value of <> * Default value depends on whether <> is enabled: - ** ECS Compatibility disabled: no default value for this setting + ** ECS Compatibility disabled: no default value (for example, the subject header is stored under the `"subject"` name) ** ECS Compatibility enabled: `"[@metadata][input][imap][headers]"` The name of the field under which mail headers will be added. +Setting `headers_target => ''` skips headers processing and no header is added to the event. +Except the date header, if present, which is always used as the event's `@timestamp`. + [id="plugins-{type}s-{plugin}-host"] ===== `host` From 68e92f90e28040c8a3f09e9b91dc740f88901fbc Mon Sep 17 00:00:00 2001 From: kares Date: Tue, 30 Nov 2021 10:36:59 +0100 Subject: [PATCH 21/22] for compatibility we should store the date header despite this being a duplicate information --- CHANGELOG.md | 3 +-- lib/logstash/inputs/imap.rb | 4 ---- spec/inputs/imap_spec.rb | 8 ++++---- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52679fc..af1b096 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,7 @@ - Feat: ECS compatibility [#55](https://github.com/logstash-plugins/logstash-input-imap/pull/55) * added (optional) `headers_target` configuration option * added (optional) `attachments_target` configuration option - - Fix: plugin should not close `$stdin`, while stoping - - Fix: make sure the 'Date' header is skipped regardless of the `lowercase_headers` setting + - Fix: plugin should not close `$stdin`, while stoping ## 3.1.0 - Adds an option to recursively search the message parts for attachment and inline attachment filenames. If the save_attachments option is set to true, the content of attachments is included in the `attachments.data` field. The attachment data can then be used by the Elasticsearch Ingest Attachment Processor Plugin. diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index 7612062..6ff34e7 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -244,10 +244,6 @@ def process_headers(mail, event) mail.header_fields.each do |header| # 'header.name' can sometimes be a Mail::Multibyte::Chars, get it in String form name = header.name.to_s - - # assume we already processed the 'date' into event.timestamp - next if name == "Date" - name = name.downcase if @lowercase_headers # Call .decoded on the header in case it's in encoded-word form. diff --git a/spec/inputs/imap_spec.rb b/spec/inputs/imap_spec.rb index 531cfcf..0d9c74b 100644 --- a/spec/inputs/imap_spec.rb +++ b/spec/inputs/imap_spec.rb @@ -130,8 +130,8 @@ end end - it 'does not set date header' do - expect( @event.include?('date') ).to be false + it 'does include the date header' do + expect( @event.include?('date') ).to be true unless ecs_compatibility? expect( @event.include?('Date') ).to be false end end @@ -165,8 +165,8 @@ end end - it 'does not set date header' do - expect( @event.include?('Date') ).to be false + it 'does include the date header' do + expect( @event.include?('Date') ).to be true unless ecs_compatibility? end end From b345e0e370b4ee48eca14675bed0206f5ad4a831 Mon Sep 17 00:00:00 2001 From: kares Date: Mon, 6 Dec 2021 07:27:10 +0100 Subject: [PATCH 22/22] Chore: wording - trigger new CI build --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index af1b096..aa8fdf7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ - Feat: ECS compatibility [#55](https://github.com/logstash-plugins/logstash-input-imap/pull/55) * added (optional) `headers_target` configuration option * added (optional) `attachments_target` configuration option - - Fix: plugin should not close `$stdin`, while stoping + - Fix: plugin should not close `$stdin`, while being stopped ## 3.1.0 - Adds an option to recursively search the message parts for attachment and inline attachment filenames. If the save_attachments option is set to true, the content of attachments is included in the `attachments.data` field. The attachment data can then be used by the Elasticsearch Ingest Attachment Processor Plugin.