From 9801af5ff9504c2df6c4b8809d86b5c92915b712 Mon Sep 17 00:00:00 2001 From: Akira Matsuda Date: Wed, 5 Nov 2025 11:25:53 +0900 Subject: [PATCH 1/3] Message#from_s3 takes List instance --- app/models/message.rb | 8 ++++---- import.rb | 8 ++++---- test/models/message_test.rb | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/app/models/message.rb b/app/models/message.rb index b110911..42ebe41 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -91,11 +91,11 @@ def from_mail(mail, list, list_seq) end class << self - def from_s3(list_name, list_seq, s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION)) - obj = s3_client.get_object(bucket: BLADE_BUCKET_NAME, key: "#{list_name}/#{list_seq}") + def from_s3(list, list_seq, s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION)) + obj = s3_client.get_object(bucket: BLADE_BUCKET_NAME, key: "#{list.name}/#{list_seq}") m = self.from_string(obj.body.read) - m.list_id = List.find_by_name(list_name).id + m.list_id = list.id m.list_seq = list_seq m end @@ -140,7 +140,7 @@ def count_recursively(count = 0) end def reload_from_s3(s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION)) - m = Message.from_s3(List.find(self.list_id).name, self.list_seq, s3_client) + m = Message.from_s3(List.find(self.list_id), self.list_seq, s3_client) self.body = m.body self.subject = m.subject diff --git a/import.rb b/import.rb index 71e4d92..0e4e926 100644 --- a/import.rb +++ b/import.rb @@ -7,7 +7,7 @@ opts.on('--to TO', Integer) end.parse!(into: params) -list = params[:list] +list = List.find_by_name(params[:list]) Message.transaction do (params[:from]..params[:to]).each do |seq| @@ -15,11 +15,11 @@ message = Message.from_s3(list, seq) message.save! rescue ActiveRecord::RecordNotUnique - STDERR.puts("#{list}:#{seq} already exists in Postgres") + STDERR.puts("#{list.name}:#{seq} already exists in Postgres") rescue Aws::S3::Errors::NoSuchKey - STDERR.puts("#{list}:#{seq} doesn't exist in S3") + STDERR.puts("#{list.name}:#{seq} doesn't exist in S3") rescue StandardError => e - STDERR.puts("failed to import #{list}:#{seq}: #{e}") + STDERR.puts("failed to import #{list.name}:#{seq}: #{e}") end end end diff --git a/test/models/message_test.rb b/test/models/message_test.rb index 58b647d..9149ee8 100644 --- a/test/models/message_test.rb +++ b/test/models/message_test.rb @@ -38,7 +38,7 @@ class MessageTest < ActiveSupport::TestCase Hello, world! END_OF_BODY - Message.from_s3('ruby-list', 1234, s3_client) + Message.from_s3(List.find_by_name('ruby-list'), 1234, s3_client) end test 'reload_from_s3' do From 031aabd4a2aa1d9b8ea4ddd4452001dbb50a3c82 Mon Sep 17 00:00:00 2001 From: Akira Matsuda Date: Wed, 5 Nov 2025 11:29:12 +0900 Subject: [PATCH 2/3] Use yet another S3 bucket that includes raw mail data --- app/models/message.rb | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/app/models/message.rb b/app/models/message.rb index 42ebe41..af62105 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -1,5 +1,5 @@ BLADE_BUCKET_REGION = 'ap-northeast-1' -BLADE_BUCKET_NAME = 'blade.ruby-lang.org' +BLADE_BUCKET_NAME = 'blade-data-vault' require 'kconv' @@ -93,11 +93,8 @@ def from_mail(mail, list, list_seq) class << self def from_s3(list, list_seq, s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION)) obj = s3_client.get_object(bucket: BLADE_BUCKET_NAME, key: "#{list.name}/#{list_seq}") - - m = self.from_string(obj.body.read) - m.list_id = list.id - m.list_seq = list_seq - m + mail = Mail.read_from_string obj.body.read.force_encoding(Encoding::BINARY) + Message.from_mail mail, list, list_seq end def from_string(str) From 4f69cbdd86df72444c83736778880b9624f0ee53 Mon Sep 17 00:00:00 2001 From: Akira Matsuda Date: Wed, 5 Nov 2025 12:58:33 +0900 Subject: [PATCH 3/3] Integrate import.rb into bin/import_mails --- README.md | 2 +- bin/import_mails | 29 ++++++++++++++++++++--------- import.rb | 25 ------------------------- 3 files changed, 21 insertions(+), 35 deletions(-) delete mode 100644 import.rb diff --git a/README.md b/README.md index f8c1d30..02dac34 100644 --- a/README.md +++ b/README.md @@ -7,5 +7,5 @@ From `heroku run bash` ``` % heroku run bash Running bash on ⬢ blade-ruby-lang... up, run.7782 -~ $ ./bin/rails runner import.rb --list ruby-list --from 1001 --to 2000 +~ $ ./bin/rails runner bin/import_mails --list ruby-list --from 1001 --to 2000 ``` diff --git a/bin/import_mails b/bin/import_mails index 60fb8cd..02f5119 100755 --- a/bin/import_mails +++ b/bin/import_mails @@ -8,6 +8,7 @@ BASE_DIR = Rails.root.join('tmp') params = {} OptionParser.new do |opts| + opts.on('--local') opts.on('--list LIST') opts.on('--from FROM', Integer) opts.on('--to TO', Integer) @@ -22,17 +23,27 @@ Rails.logger.level = Logger::INFO Message.transaction do (params[:from]..params[:to]).each do |seq| begin - filepath = BASE_DIR.join(list.name, seq.to_s) - next unless filepath.exist? - - str = File.binread filepath - next if str.blank? - - mail = Mail.read_from_string str - message = Message.from_mail mail, list, seq + if params[:local] + filepath = BASE_DIR.join(list.name, seq.to_s) + raise "No #{seq.to_s}" unless filepath.exist? + next + next unless filepath.exist? + + str = File.binread filepath + next if str.blank? + + mail = Mail.read_from_string str + message = Message.from_mail mail, list, seq + else + message = Message.from_s3(list, seq) + end + + p seq if seq % 10 == 0 message.save! rescue ActiveRecord::RecordNotUnique - STDERR.puts("#{list}:#{seq} already exists in Postgres") + STDERR.puts("#{list.name}:#{seq} already exists in Postgres") + rescue Aws::S3::Errors::NoSuchKey + STDERR.puts("#{list.name}:#{seq} doesn't exist in S3") rescue StandardError => e errors << [seq, e] STDERR.puts("failed to import #{list.name}:#{seq}: #{e}") diff --git a/import.rb b/import.rb deleted file mode 100644 index 0e4e926..0000000 --- a/import.rb +++ /dev/null @@ -1,25 +0,0 @@ -require 'optparse' - -params = {} -OptionParser.new do |opts| - opts.on('--list LIST') - opts.on('--from FROM', Integer) - opts.on('--to TO', Integer) -end.parse!(into: params) - -list = List.find_by_name(params[:list]) - -Message.transaction do - (params[:from]..params[:to]).each do |seq| - begin - message = Message.from_s3(list, seq) - message.save! - rescue ActiveRecord::RecordNotUnique - STDERR.puts("#{list.name}:#{seq} already exists in Postgres") - rescue Aws::S3::Errors::NoSuchKey - STDERR.puts("#{list.name}:#{seq} doesn't exist in S3") - rescue StandardError => e - STDERR.puts("failed to import #{list.name}:#{seq}: #{e}") - end - end -end