diff --git a/README.md b/README.md index f8c1d30..02dac34 100644 --- a/README.md +++ b/README.md @@ -7,5 +7,5 @@ From `heroku run bash` ``` % heroku run bash Running bash on ⬢ blade-ruby-lang... up, run.7782 -~ $ ./bin/rails runner import.rb --list ruby-list --from 1001 --to 2000 +~ $ ./bin/rails runner bin/import_mails --list ruby-list --from 1001 --to 2000 ``` diff --git a/app/models/message.rb b/app/models/message.rb index b110911..af62105 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -1,5 +1,5 @@ BLADE_BUCKET_REGION = 'ap-northeast-1' -BLADE_BUCKET_NAME = 'blade.ruby-lang.org' +BLADE_BUCKET_NAME = 'blade-data-vault' require 'kconv' @@ -91,13 +91,10 @@ def from_mail(mail, list, list_seq) end class << self - def from_s3(list_name, list_seq, s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION)) - obj = s3_client.get_object(bucket: BLADE_BUCKET_NAME, key: "#{list_name}/#{list_seq}") - - m = self.from_string(obj.body.read) - m.list_id = List.find_by_name(list_name).id - m.list_seq = list_seq - m + def from_s3(list, list_seq, s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION)) + obj = s3_client.get_object(bucket: BLADE_BUCKET_NAME, key: "#{list.name}/#{list_seq}") + mail = Mail.read_from_string obj.body.read.force_encoding(Encoding::BINARY) + Message.from_mail mail, list, list_seq end def from_string(str) @@ -140,7 +137,7 @@ def count_recursively(count = 0) end def reload_from_s3(s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION)) - m = Message.from_s3(List.find(self.list_id).name, self.list_seq, s3_client) + m = Message.from_s3(List.find(self.list_id), self.list_seq, s3_client) self.body = m.body self.subject = m.subject diff --git a/bin/import_mails b/bin/import_mails index 60fb8cd..02f5119 100755 --- a/bin/import_mails +++ b/bin/import_mails @@ -8,6 +8,7 @@ BASE_DIR = Rails.root.join('tmp') params = {} OptionParser.new do |opts| + opts.on('--local') opts.on('--list LIST') opts.on('--from FROM', Integer) opts.on('--to TO', Integer) @@ -22,17 +23,27 @@ Rails.logger.level = Logger::INFO Message.transaction do (params[:from]..params[:to]).each do |seq| begin - filepath = BASE_DIR.join(list.name, seq.to_s) - next unless filepath.exist? - - str = File.binread filepath - next if str.blank? - - mail = Mail.read_from_string str - message = Message.from_mail mail, list, seq + if params[:local] + filepath = BASE_DIR.join(list.name, seq.to_s) + raise "No #{seq.to_s}" unless filepath.exist? + next + next unless filepath.exist? + + str = File.binread filepath + next if str.blank? + + mail = Mail.read_from_string str + message = Message.from_mail mail, list, seq + else + message = Message.from_s3(list, seq) + end + + p seq if seq % 10 == 0 message.save! rescue ActiveRecord::RecordNotUnique - STDERR.puts("#{list}:#{seq} already exists in Postgres") + STDERR.puts("#{list.name}:#{seq} already exists in Postgres") + rescue Aws::S3::Errors::NoSuchKey + STDERR.puts("#{list.name}:#{seq} doesn't exist in S3") rescue StandardError => e errors << [seq, e] STDERR.puts("failed to import #{list.name}:#{seq}: #{e}") diff --git a/import.rb b/import.rb deleted file mode 100644 index 71e4d92..0000000 --- a/import.rb +++ /dev/null @@ -1,25 +0,0 @@ -require 'optparse' - -params = {} -OptionParser.new do |opts| - opts.on('--list LIST') - opts.on('--from FROM', Integer) - opts.on('--to TO', Integer) -end.parse!(into: params) - -list = params[:list] - -Message.transaction do - (params[:from]..params[:to]).each do |seq| - begin - message = Message.from_s3(list, seq) - message.save! - rescue ActiveRecord::RecordNotUnique - STDERR.puts("#{list}:#{seq} already exists in Postgres") - rescue Aws::S3::Errors::NoSuchKey - STDERR.puts("#{list}:#{seq} doesn't exist in S3") - rescue StandardError => e - STDERR.puts("failed to import #{list}:#{seq}: #{e}") - end - end -end diff --git a/test/models/message_test.rb b/test/models/message_test.rb index 58b647d..9149ee8 100644 --- a/test/models/message_test.rb +++ b/test/models/message_test.rb @@ -38,7 +38,7 @@ class MessageTest < ActiveSupport::TestCase Hello, world! END_OF_BODY - Message.from_s3('ruby-list', 1234, s3_client) + Message.from_s3(List.find_by_name('ruby-list'), 1234, s3_client) end test 'reload_from_s3' do