Skip to content

Commit

Permalink
Removing attach_works wich would slow down edits
Browse files Browse the repository at this point in the history
This forced publish to utilize S3 instead of local files, which should be faster.
Many tests needed to change to not attach files to precuration_uploads and instead mock S3
  • Loading branch information
carolyncole committed Feb 21, 2023
1 parent fcfb356 commit c3384ca
Show file tree
Hide file tree
Showing 10 changed files with 159 additions and 242 deletions.
58 changes: 6 additions & 52 deletions app/models/work.rb
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ def publish_test_doi?

after_save do |work|
if work.approved?
work.attach_s3_resources if !work.pre_curation_uploads.empty? && work.pre_curation_uploads.length > work.post_curation_uploads.length
work.reload
end
end
Expand Down Expand Up @@ -384,20 +383,6 @@ def s3_client

delegate :bucket_name, to: :s3_query_service

# Transmit a HEAD request for an S3 Object in the post-curation Bucket
# @param key [String]
# @param bucket_name [String]
# @return [Aws::S3::Types::HeadObjectOutput]
def find_post_curation_s3_object(bucket_name:, key:)
s3_client.head_object({
bucket: bucket_name,
key: key
})
true
rescue Aws::S3::Errors::NotFound
nil
end

# Generates the S3 Object key
# @return [String]
def s3_object_key
Expand All @@ -408,6 +393,8 @@ def s3_object_key
# @param bucket_name location to be checked to be found
# @return [Aws::S3::Types::HeadObjectOutput]
def find_post_curation_s3_dir(bucket_name:)
# TODO: Directories really do not exists in S3
# if we really need this check then we need to do something else to check the bucket
s3_client.head_object({
bucket: bucket_name,
key: s3_object_key
Expand All @@ -417,34 +404,6 @@ def find_post_curation_s3_dir(bucket_name:)
nil
end

# Transmit a DELETE request for the S3 directory in the pre-curation Bucket
# @return [Aws::S3::Types::DeleteObjectOutput]
def delete_pre_curation_s3_dir
s3_client.delete_object({
bucket: bucket_name,
key: s3_object_key
})
rescue Aws::S3::Errors::ServiceError => error
raise(StandardError, "Failed to delete the pre-curation S3 Bucket directory #{s3_object_key}: #{error}")
end

# This is invoked within the scope of #after_save. Attachment objects require that the parent record be persisted (hence, #before_save is not an option).
# However, a consequence of this is that #after_save is invoked whenever a new attached Blob or Attachment object is persisted.
def attach_s3_resources
return if approved?
changes = []
# This retrieves and adds S3 uploads if they do not exist
pre_curation_s3_resources.each do |s3_file|
if add_pre_curation_s3_object(s3_file)
changes << { action: :added, filename: s3_file.filename }
end
end

# Log the new files, but don't link the change to the current_user since we really don't know
# who added the files directly to AWS S3.
log_file_changes(changes, nil)
end

def as_json(options = nil)
if options&.present?
raise(StandardError, "Received options #{options}, but not supported")
Expand Down Expand Up @@ -668,7 +627,7 @@ def add_pre_curation_s3_object(s3_file)

def publish_precurated_files
# An error is raised if there are no files to be moved
raise(StandardError, "Attempting to publish a Work without attached uploads for #{s3_object_key}") if pre_curation_uploads.empty? && post_curation_uploads.empty?
raise(StandardError, "Attempting to publish a Work without attached uploads for #{s3_object_key}") if pre_curation_uploads_fast.empty? && post_curation_uploads.empty?

# We need to explicitly access to post-curation services here.
# Lets explicitly create it so the state of the work does not have any impact.
Expand All @@ -678,17 +637,12 @@ def publish_precurated_files
raise(StandardError, "Attempting to publish a Work with an existing S3 Bucket directory for: #{s3_object_key}") unless s3_dir.nil?

# Copy the pre-curation S3 Objects to the post-curation S3 Bucket...
transferred_files = s3_post_curation_query_service.publish_files
transferred_file_errors = s3_query_service.publish_files

# ...check that the files are indeed now in the post-curation bucket...
pre_curation_uploads.each do |attachment|
s3_object = find_post_curation_s3_object(bucket_name: s3_post_curation_query_service.bucket_name, key: attachment.key)
raise(StandardError, "Failed to validate the uploaded S3 Object #{attachment.key}") if s3_object.nil?
if transferred_file_errors.count > 0
raise(StandardError, "Failed to validate the uploaded S3 Object #{transferred_file_errors.map(&:key).join(', ')}")
end

# ...and delete them from the pre-curation bucket.
transferred_files.each(&:purge)
delete_pre_curation_s3_dir
end
end
# rubocop:enable Metrics/ClassLength
32 changes: 26 additions & 6 deletions app/services/s3_query_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def model_s3_files
return objects if model.nil?

model_uploads.each do |attachment|
s3_file = S3File.new(work: model,
s3_file = S3File.new(query_service: self,
filename: attachment.key,
last_modified: attachment.created_at,
size: attachment.byte_size,
Expand Down Expand Up @@ -140,7 +140,7 @@ def find_s3_file(filename:)

# Retrieve the S3 resources uploaded to the S3 Bucket
# @return [Array<S3File>]
def client_s3_files(reload: false)
def client_s3_files(reload: false, bucket_name: self.bucket_name)
@client_s3_files = nil if reload # force a reload
@client_s3_files ||= begin
start = Time.zone.now
Expand Down Expand Up @@ -184,20 +184,24 @@ def data_profile
# Notice that the copy process happens at AWS (i.e. the files are not downloaded and re-uploaded).
# Returns an array with the files that were copied.
def publish_files
files = []
source_bucket = S3QueryService.pre_curation_config[:bucket]
target_bucket = S3QueryService.post_curation_config[:bucket]
model.pre_curation_uploads.each do |file|
files = client_s3_files(reload: true, bucket_name: source_bucket)

files.each do |file|
params = {
copy_source: "/#{source_bucket}/#{file.key}",
bucket: target_bucket,
key: file.key
}
Rails.logger.info("Copying #{params[:copy_source]} to #{params[:bucket]}/#{params[:key]}")
client.copy_object(params)
files << file
end
files

error_files = check_files(target_bucket, files)

delete_files_and_directory(files) if error_files.empty?
error_files
end

def delete_s3_object(s3_file_key)
Expand Down Expand Up @@ -237,5 +241,21 @@ def parse_continuation(resp_hash)
end
objects
end

def check_files(target_bucket, files)
error_files = []
files.each do |file|
error_files << file unless client.head_object({ bucket: target_bucket, key: file.key })
end
error_files
end

def delete_files_and_directory(files)
# ...and delete them from the pre-curation bucket.
files.each do |s3_file|
delete_s3_object(s3_file.key)
end
delete_s3_object(model.s3_object_key)
end
end
# rubocop:enable Metrics/ClassLength
13 changes: 7 additions & 6 deletions spec/controllers/works_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -926,12 +926,16 @@
end

describe "#approve" do
before do
stub_s3
allow(Work).to receive(:find).with(work.id).and_return(work)
allow(Work).to receive(:find).with(work.id.to_s).and_return(work)
allow(work).to receive(:publish_precurated_files).and_return(true)
end

it "handles aprovals" do
work.complete_submission!(user)
stub_datacite_doi
file_name = uploaded_file.original_filename
stub_work_s3_requests(work: work, file_name: file_name)
work.pre_curation_uploads.attach(uploaded_file)

sign_in curator
post :approve, params: { id: work.id }
Expand All @@ -945,9 +949,6 @@
sign_in curator
work.complete_submission!(user)
stub_datacite_doi(result: Failure(Faraday::Response.new(Faraday::Env.new)))
file_name = uploaded_file.original_filename
stub_work_s3_requests(work: work, file_name: file_name)
work.pre_curation_uploads.attach(uploaded_file)
end

it "aproves and notes that it was not published" do
Expand Down
Loading

0 comments on commit c3384ca

Please sign in to comment.