Removing attach_works wich would slow down edits

This forced publish to utilize S3 instead of local files, which should be faster. Many tests needed to change to not attach files to precuration_uploads and instead mock S3
pulibrary · Feb 21, 2023 · c3384ca · c3384ca
1 parent fcfb356
commit c3384ca
Show file tree

Hide file tree

Showing 10 changed files with 159 additions and 242 deletions.
diff --git a/app/models/work.rb b/app/models/work.rb
@@ -124,7 +124,6 @@ def publish_test_doi?
 
   after_save do |work|
     if work.approved?
-      work.attach_s3_resources if !work.pre_curation_uploads.empty? && work.pre_curation_uploads.length > work.post_curation_uploads.length
       work.reload
     end
   end
@@ -384,20 +383,6 @@ def s3_client
 
   delegate :bucket_name, to: :s3_query_service
 
-  # Transmit a HEAD request for an S3 Object in the post-curation Bucket
-  # @param key [String]
-  # @param bucket_name [String]
-  # @return [Aws::S3::Types::HeadObjectOutput]
-  def find_post_curation_s3_object(bucket_name:, key:)
-    s3_client.head_object({
-                            bucket: bucket_name,
-                            key: key
-                          })
-    true
-  rescue Aws::S3::Errors::NotFound
-    nil
-  end
-
   # Generates the S3 Object key
   # @return [String]
   def s3_object_key
@@ -408,6 +393,8 @@ def s3_object_key
   # @param bucket_name location to be checked to be found
   # @return [Aws::S3::Types::HeadObjectOutput]
   def find_post_curation_s3_dir(bucket_name:)
+    # TODO: Directories really do not exists in S3
+    #      if we really need this check then we need to do something else to check the bucket
     s3_client.head_object({
                             bucket: bucket_name,
                             key: s3_object_key
@@ -417,34 +404,6 @@ def find_post_curation_s3_dir(bucket_name:)
     nil
   end
 
-  # Transmit a DELETE request for the S3 directory in the pre-curation Bucket
-  # @return [Aws::S3::Types::DeleteObjectOutput]
-  def delete_pre_curation_s3_dir
-    s3_client.delete_object({
-                              bucket: bucket_name,
-                              key: s3_object_key
-                            })
-  rescue Aws::S3::Errors::ServiceError => error
-    raise(StandardError, "Failed to delete the pre-curation S3 Bucket directory #{s3_object_key}: #{error}")
-  end
-
-  # This is invoked within the scope of #after_save. Attachment objects require that the parent record be persisted (hence, #before_save is not an option).
-  # However, a consequence of this is that #after_save is invoked whenever a new attached Blob or Attachment object is persisted.
-  def attach_s3_resources
-    return if approved?
-    changes = []
-    # This retrieves and adds S3 uploads if they do not exist
-    pre_curation_s3_resources.each do |s3_file|
-      if add_pre_curation_s3_object(s3_file)
-        changes << { action: :added, filename: s3_file.filename }
-      end
-    end
-
-    # Log the new files, but don't link the change to the current_user since we really don't know
-    # who added the files directly to AWS S3.
-    log_file_changes(changes, nil)
-  end
-
   def as_json(options = nil)
     if options&.present?
       raise(StandardError, "Received options #{options}, but not supported")
@@ -668,7 +627,7 @@ def add_pre_curation_s3_object(s3_file)
 
     def publish_precurated_files
       # An error is raised if there are no files to be moved
-      raise(StandardError, "Attempting to publish a Work without attached uploads for #{s3_object_key}") if pre_curation_uploads.empty? && post_curation_uploads.empty?
+      raise(StandardError, "Attempting to publish a Work without attached uploads for #{s3_object_key}") if pre_curation_uploads_fast.empty? && post_curation_uploads.empty?
 
       # We need to explicitly access to post-curation services here.
       # Lets explicitly create it so the state of the work does not have any impact.
@@ -678,17 +637,12 @@ def publish_precurated_files
       raise(StandardError, "Attempting to publish a Work with an existing S3 Bucket directory for: #{s3_object_key}") unless s3_dir.nil?
 
       # Copy the pre-curation S3 Objects to the post-curation S3 Bucket...
-      transferred_files = s3_post_curation_query_service.publish_files
+      transferred_file_errors = s3_query_service.publish_files
 
       # ...check that the files are indeed now in the post-curation bucket...
-      pre_curation_uploads.each do |attachment|
-        s3_object = find_post_curation_s3_object(bucket_name: s3_post_curation_query_service.bucket_name, key: attachment.key)
-        raise(StandardError, "Failed to validate the uploaded S3 Object #{attachment.key}") if s3_object.nil?
+      if transferred_file_errors.count > 0
+        raise(StandardError, "Failed to validate the uploaded S3 Object #{transferred_file_errors.map(&:key).join(', ')}")
       end
-
-      # ...and delete them from the pre-curation bucket.
-      transferred_files.each(&:purge)
-      delete_pre_curation_s3_dir
     end
 end
 # rubocop:enable Metrics/ClassLength
diff --git a/app/services/s3_query_service.rb b/app/services/s3_query_service.rb
@@ -107,7 +107,7 @@ def model_s3_files
     return objects if model.nil?
 
     model_uploads.each do |attachment|
-      s3_file = S3File.new(work: model,
+      s3_file = S3File.new(query_service: self,
                            filename: attachment.key,
                            last_modified: attachment.created_at,
                            size: attachment.byte_size,
@@ -140,7 +140,7 @@ def find_s3_file(filename:)
 
   # Retrieve the S3 resources uploaded to the S3 Bucket
   # @return [Array<S3File>]
-  def client_s3_files(reload: false)
+  def client_s3_files(reload: false, bucket_name: self.bucket_name)
     @client_s3_files = nil if reload # force a reload
     @client_s3_files ||= begin
       start = Time.zone.now
@@ -184,20 +184,24 @@ def data_profile
   # Notice that the copy process happens at AWS (i.e. the files are not downloaded and re-uploaded).
   # Returns an array with the files that were copied.
   def publish_files
-    files = []
     source_bucket = S3QueryService.pre_curation_config[:bucket]
     target_bucket = S3QueryService.post_curation_config[:bucket]
-    model.pre_curation_uploads.each do |file|
+    files = client_s3_files(reload: true, bucket_name: source_bucket)
+
+    files.each do |file|
       params = {
         copy_source: "/#{source_bucket}/#{file.key}",
         bucket: target_bucket,
         key: file.key
       }
       Rails.logger.info("Copying #{params[:copy_source]} to #{params[:bucket]}/#{params[:key]}")
       client.copy_object(params)
-      files << file
     end
-    files
+
+    error_files = check_files(target_bucket, files)
+
+    delete_files_and_directory(files) if error_files.empty?
+    error_files
   end
 
   def delete_s3_object(s3_file_key)
@@ -237,5 +241,21 @@ def parse_continuation(resp_hash)
       end
       objects
     end
+
+    def check_files(target_bucket, files)
+      error_files = []
+      files.each do |file|
+        error_files << file unless client.head_object({ bucket: target_bucket, key: file.key })
+      end
+      error_files
+    end
+
+    def delete_files_and_directory(files)
+      # ...and delete them from the pre-curation bucket.
+      files.each do |s3_file|
+        delete_s3_object(s3_file.key)
+      end
+      delete_s3_object(model.s3_object_key)
+    end
 end
 # rubocop:enable Metrics/ClassLength
diff --git a/spec/controllers/works_controller_spec.rb b/spec/controllers/works_controller_spec.rb
@@ -926,12 +926,16 @@
     end
 
     describe "#approve" do
+      before do
+        stub_s3
+        allow(Work).to receive(:find).with(work.id).and_return(work)
+        allow(Work).to receive(:find).with(work.id.to_s).and_return(work)
+        allow(work).to receive(:publish_precurated_files).and_return(true)
+      end
+
       it "handles aprovals" do
         work.complete_submission!(user)
         stub_datacite_doi
-        file_name = uploaded_file.original_filename
-        stub_work_s3_requests(work: work, file_name: file_name)
-        work.pre_curation_uploads.attach(uploaded_file)
 
         sign_in curator
         post :approve, params: { id: work.id }
@@ -945,9 +949,6 @@
           sign_in curator
           work.complete_submission!(user)
           stub_datacite_doi(result: Failure(Faraday::Response.new(Faraday::Env.new)))
-          file_name = uploaded_file.original_filename
-          stub_work_s3_requests(work: work, file_name: file_name)
-          work.pre_curation_uploads.attach(uploaded_file)
         end
 
         it "aproves and notes that it was not published" do