Permalink
Browse files

(#7938) Delayed import from file, not YAML string.

When we import a report, pushing the entire content through the database using
DelayedJob is fairly inefficient - we were shipping megabytes of YAML around,
with some inefficient encoding, to get the data imported.

Writing the report to a file, then importing by filename, increases
performance substantially, at the cost of tying us to workers on the same
node.  Right now, that seems a worthwhile trade-off.

Reviewed-By: Matt Robinson <matt@puppetlabs.com>

Based on commit 1d00feb7a66c3f8001eec87d22c1cef823c5b945 by
  Pieter van de Bruggen <pieter@puppetlabs.com>
  • Loading branch information...
1 parent 58c2b52 commit e839884fb67cb02ab80619bf3f6ef4554d8f3ef6 Pieter van de Bruggen committed with slippycheeze Jun 15, 2011
@@ -27,8 +27,22 @@ def create
def upload
begin
- Report.create_from_yaml(params[:report][:report])
- render :text => "Report successfully uploaded"
+ yaml = params[:report][:report]
+ md5 = Digest::MD5.hexdigest(yaml)
+ file = Rails.root + 'spool' + "#{md5}.yaml"
+ n = 0
+
+ begin
+ fd = File.new(file, File::CREAT|File::EXCL|File::RDWR, 0600)
+ fd.print yaml
+ fd.close
+
+ Report.delay.create_from_yaml_file(file.to_s, :delete => true)
+ render :text => "Report queued for import as #{md5}"
+ rescue Errno::EEXIST
+ file = Rails.root + 'spool' + "#{md5}-#{n += 1}.yaml"
+ retry
+ end
rescue => e
error_text = "ERROR! ReportsController#upload failed:"
Rails.logger.debug error_text
View
@@ -79,6 +79,12 @@ def self.attribute_hash_from(report_hash)
attribute_hash
end
+ def self.create_from_yaml_file(report_file, options = {})
+ report = create_from_yaml(File.read(report_file))
+ File.unlink(report_file) if options[:delete]
+ return report
+ end
+
def self.create_from_yaml(report_yaml)
raw_report = YAML.load(report_yaml)
@@ -96,10 +102,6 @@ def self.create_from_yaml(report_yaml)
report
end
- class << self
- handle_asynchronously :create_from_yaml
- end
-
def assign_to_node
self.node = Node.find_or_create_by_name(self.host)
end
@@ -1,11 +1,12 @@
-DELAYED_JOB_PID_PATH = "#{Rails.root}/tmp/pids/delayed_job.pid"
+DELAYED_JOB_PID_PATH = Pathname.new "#{Rails.root}/tmp/pids/delayed_job_#{Rails.env}"
+DELAYED_JOB_PID_PATH.mkpath
Delayed::Worker.destroy_failed_jobs = false
Delayed::Worker.max_attempts = 3
def start_delayed_job
Thread.new do
- `#{Rails.root}/script/delayed_job -p dashboard -m start`
+ `#{Rails.root}/script/delayed_job --pid-dir=#{DELAYED_JOB_PID_PATH} -p dashboard -m start`
end
end
@@ -19,6 +20,8 @@ def process_is_dead?
end
end
-if !File.exist?(DELAYED_JOB_PID_PATH) && process_is_dead?
- start_delayed_job
+unless Rails.env == 'test'
+ if !File.exist?(DELAYED_JOB_PID_PATH + 'delayed_job.pid') && process_is_dead?
+ start_delayed_job
+ end
end
@@ -9,14 +9,13 @@ namespace :reports do
plural = lambda{|str, count| str + (count != 1 ? 's' : '')}
reports = FileList[File.join(report_dir, '**', '*.yaml')]
- STDOUT.puts "Importing #{reports.size} #{plural['report', reports.size]} from #{report_dir}"
+ STDOUT.puts "Importing #{reports.size} #{plural['report', reports.size]} from #{report_dir} in the background"
skipped = 0
pbar = ProgressBar.new("Importing:", reports.size, STDOUT)
reports.each do |report|
- data = File.read(report)
success = begin
- Report.create_from_yaml(data)
+ Report.delay.create_from_yaml_file(report)
rescue => e
puts e
false
@@ -28,7 +27,7 @@ namespace :reports do
successes = reports.size - skipped
- STDOUT.puts "#{successes} of #{reports.size} #{plural['report', successes]} imported"
+ STDOUT.puts "#{successes} of #{reports.size} #{plural['report', successes]} queued"
STDOUT.puts "#{skipped} #{plural['report', skipped]} skipped" if skipped > 0
end
end
@@ -15,28 +15,37 @@ def model; Report end
describe "correctly formatted POST", :shared => true do
it { should_not raise_error }
it { should change(Report, :count).by(1) }
- it { should change{ Node.find_by_name("sample_node")}.from(nil) }
+ it { should change { Node.find_by_name("sample_node") }.from(nil) }
end
describe "with a POST from Puppet 2.6.x" do
subject do
- lambda { post_with_body('upload', @yaml, :content_type => 'application/x-yaml') }
+ lambda {
+ post_with_body('upload', @yaml, :content_type => 'application/x-yaml')
+ Delayed::Worker.new.work_off
+ }
end
it_should_behave_like "correctly formatted POST"
end
describe "with a POST from Puppet 0.25.x" do
subject do
- lambda { post('upload', :report => @yaml) }
+ lambda {
+ post('upload', :report => @yaml)
+ Delayed::Worker.new.work_off
+ }
end
it_should_behave_like "correctly formatted POST"
end
describe "with a POST with a report inside the report parameter" do
subject do
- lambda { post('upload', :report => { :report => @yaml }) }
+ lambda {
+ post('upload', :report => { :report => @yaml })
+ Delayed::Worker.new.work_off
+ }
end
it_should_behave_like "correctly formatted POST"
@@ -47,9 +56,8 @@ def model; Report end
post('upload', :report => "" )
end
- it "should return a 406 response and the error text" do
- response.code.should == '406'
- response.body.should == "ERROR! ReportsController#upload failed: The supplied report is in invalid format 'FalseClass', expected 'Puppet::Transaction::Report'"
+ it "should be 200, because we queued the job" do
+ response.code.should == '200'
end
end
@@ -58,9 +66,8 @@ def model; Report end
post('upload', :report => "foo bar baz bad data invalid")
end
- it "should return a 406 response and the error text" do
- response.code.should == '406'
- response.body.should == "ERROR! ReportsController#upload failed: The supplied report is in invalid format 'String', expected 'Puppet::Transaction::Report'"
+ it "should be 200, because we queued the job" do
+ response.code.should == '200'
end
end
end
View
@@ -0,0 +1,3 @@
+# This directory is used to spool YAML reports for background importing.
+# None of the content lives beyond the time to load the report.
+*

0 comments on commit e839884

Please sign in to comment.