From f6e9c2b0371086cc43d19bd8c5f77a6cd12cebde Mon Sep 17 00:00:00 2001 From: Akira Matsuda Date: Wed, 22 Oct 2025 01:13:41 +0900 Subject: [PATCH 1/7] Handle text/enriched; as text ruby-dev: 835 --- app/models/message.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/message.rb b/app/models/message.rb index 2eb5ec9..eda0564 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -72,7 +72,7 @@ def from_mail(mail, list, list_seq) attachments.attach(io: file, filename: part.filename, content_type: part.content_type) else case part.content_type.downcase - when /^text\/plain/ + when /^text\/plain/, /text\/enriched;/ (self.body ||= '') << Kconv.toutf8(part.body.raw_source) when /^text\/html;/ (self.html_body ||= '') << Kconv.toutf8(part.body.raw_source) From 5f481f71652f16d7503cca45e991ac834a22108f Mon Sep 17 00:00:00 2001 From: Akira Matsuda Date: Wed, 22 Oct 2025 01:26:21 +0900 Subject: [PATCH 2/7] Handle application/ms-tnef; as an attachment ruby-dev: 1148 --- app/models/message.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app/models/message.rb b/app/models/message.rb index eda0564..a098274 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -72,6 +72,9 @@ def from_mail(mail, list, list_seq) attachments.attach(io: file, filename: part.filename, content_type: part.content_type) else case part.content_type.downcase + when 'application/ms-tnef' + file = StringIO.new(part.decoded) + attachments.attach(io: file, filename: part.filename || 'noname', content_type: part.content_type) when /^text\/plain/, /text\/enriched;/ (self.body ||= '') << Kconv.toutf8(part.body.raw_source) when /^text\/html;/ From ee6f828d8432d7cda8660cec92475c13816edff5 Mon Sep 17 00:00:00 2001 From: Akira Matsuda Date: Wed, 22 Oct 2025 03:22:46 +0900 Subject: [PATCH 3/7] multipart body can contain multipart so let's call it recursively --- app/models/message.rb | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/app/models/message.rb b/app/models/message.rb index a098274..a21f501 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -23,13 +23,7 @@ def from_mail(mail, list, list_seq) def from_mail(mail, list, list_seq) self.list_id, self.list_seq, self.published_at = list.id, list_seq, mail.date - if mail.multipart? - mail.parts.each do |p| - handle_multipart p - end - else - self.body = Kconv.toutf8 mail.body.raw_source - end + handle_body mail if ((list.name == 'ruby-dev') && list_seq.in?([13859, 26229, 39731, 39734])) || ((list.name == 'ruby-core') && list_seq.in?([5231])) || ((list.name == 'ruby-list') && list_seq.in?([29637, 29711, 30148])) || ((list.name == 'ruby-talk') && list_seq.in?([5198, 61316])) self.body.gsub!("\u0000", '') @@ -66,16 +60,20 @@ def from_mail(mail, list, list_seq) self end - private def handle_multipart(part) - if part.attachment? + private def handle_body(part) + if part.multipart? + part.parts.each do |p| + handle_body p + end + elsif part.attachment? file = StringIO.new(part.decoded) attachments.attach(io: file, filename: part.filename, content_type: part.content_type) else - case part.content_type.downcase + case part.content_type&.downcase when 'application/ms-tnef' file = StringIO.new(part.decoded) attachments.attach(io: file, filename: part.filename || 'noname', content_type: part.content_type) - when /^text\/plain/, /text\/enriched;/ + when /^text\/plain/, /text\/enriched;/, nil (self.body ||= '') << Kconv.toutf8(part.body.raw_source) when /^text\/html;/ (self.html_body ||= '') << Kconv.toutf8(part.body.raw_source) From 23183c8c4fe3f2c7d6b81cd7c93f9a7c12c568e4 Mon Sep 17 00:00:00 2001 From: Akira Matsuda Date: Wed, 22 Oct 2025 10:36:00 +0900 Subject: [PATCH 4/7] Perhaps attachment file body should better not be decoded? --- app/models/message.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/message.rb b/app/models/message.rb index a21f501..269009d 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -66,7 +66,7 @@ def from_mail(mail, list, list_seq) handle_body p end elsif part.attachment? - file = StringIO.new(part.decoded) + file = StringIO.new(part.body.raw_source) attachments.attach(io: file, filename: part.filename, content_type: part.content_type) else case part.content_type&.downcase From 776916586f5c2db109bddfec6cbc4a9caf6ff6c1 Mon Sep 17 00:00:00 2001 From: Akira Matsuda Date: Wed, 22 Oct 2025 10:52:46 +0900 Subject: [PATCH 5/7] from_address can be nil and in that case it causes NPE --- app/models/message.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/models/message.rb b/app/models/message.rb index 269009d..bfea2d6 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -38,7 +38,8 @@ def from_mail(mail, list, list_seq) self.subject = mail.subject self.subject = Kconv.toutf8 subject if self.subject - self.from = Kconv.toutf8 mail.from_address&.raw + self.from = mail.from_address&.raw + self.from = Kconv.toutf8 from if from if !self.from && (list.name == 'ruby-core') && (list_seq == 161) self.from = mail.from.encode Encoding::UTF_8, Encoding::KOI8_R end From 981ca6c7ad0d7acfb8873c664c63357e88236e93 Mon Sep 17 00:00:00 2001 From: Akira Matsuda Date: Wed, 22 Oct 2025 11:48:37 +0900 Subject: [PATCH 6/7] style --- app/models/message.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/message.rb b/app/models/message.rb index bfea2d6..da60bae 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -40,7 +40,7 @@ def from_mail(mail, list, list_seq) self.from = mail.from_address&.raw self.from = Kconv.toutf8 from if from - if !self.from && (list.name == 'ruby-core') && (list_seq == 161) + if !from && (list.name == 'ruby-core') && (list_seq == 161) self.from = mail.from.encode Encoding::UTF_8, Encoding::KOI8_R end From 0a7105a4bbf92d964a7fbfafa72a86844574e701 Mon Sep 17 00:00:00 2001 From: Akira Matsuda Date: Wed, 22 Oct 2025 11:48:58 +0900 Subject: [PATCH 7/7] Some more content-types --- app/models/message.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/app/models/message.rb b/app/models/message.rb index da60bae..f873c96 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -71,12 +71,14 @@ def from_mail(mail, list, list_seq) attachments.attach(io: file, filename: part.filename, content_type: part.content_type) else case part.content_type&.downcase + when 'application/pgp-signature' + # ignore when 'application/ms-tnef' file = StringIO.new(part.decoded) attachments.attach(io: file, filename: part.filename || 'noname', content_type: part.content_type) - when /^text\/plain/, /text\/enriched;/, nil + when /^text\/plain/, /text\/enriched;/, 'message/rfc822', nil (self.body ||= '') << Kconv.toutf8(part.body.raw_source) - when /^text\/html;/ + when /^text\/html/ (self.html_body ||= '') << Kconv.toutf8(part.body.raw_source) else puts "Unknown content_type: #{part.content_type}"