Skip to content

Commit

Permalink
Remove the ignore_directories flag from the code
Browse files Browse the repository at this point in the history
Co-authored-by: Robert-Anthony Lee-Faison <leefaisonr@users.noreply.github.com>
  • Loading branch information
hectorcorrea and leefaisonr committed Jun 3, 2024
1 parent 4fd3ebd commit cbca1f0
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 11 deletions.
2 changes: 1 addition & 1 deletion app/services/pul_dspace_aws_connector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def upload_to_s3(dspace_files)

def aws_files
return [] if ark.nil? || dspace_doi.nil?
@aws_files ||= work.s3_query_service.client_s3_files(reload: true, bucket_name: dspace_bucket_name, prefix: dspace_doi.tr(".", "-"), ignore_directories: false)
@aws_files ||= work.s3_query_service.client_s3_files(reload: true, bucket_name: dspace_bucket_name, prefix: dspace_doi.tr(".", "-"))
end

private
Expand Down
15 changes: 6 additions & 9 deletions app/services/s3_query_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,12 @@ def find_s3_file(filename:)

# Retrieve the S3 resources uploaded to the S3 Bucket
# @return [Array<S3File>]
def client_s3_files(reload: false, bucket_name: self.bucket_name, prefix: self.prefix, ignore_directories: true)
def client_s3_files(reload: false, bucket_name: self.bucket_name, prefix: self.prefix)
if reload # force a reload
@client_s3_files = nil
clear_s3_responses(bucket_name:, prefix:)
end
@client_s3_files ||= get_s3_objects(bucket_name:, prefix:, ignore_directories:)
@client_s3_files ||= get_s3_objects(bucket_name:, prefix:)
end

def client_s3_empty_files(reload: false, bucket_name: self.bucket_name, prefix: self.prefix)
Expand All @@ -174,7 +174,7 @@ def client_s3_empty_files(reload: false, bucket_name: self.bucket_name, prefix:
clear_s3_responses(bucket_name:, prefix:)
end
@client_s3_empty_files ||= begin
files_and_directories = get_s3_objects(bucket_name:, prefix:, ignore_directories: false)
files_and_directories = get_s3_objects(bucket_name:, prefix:)
files_and_directories.select { |object| !object.filename.ends_with?("/") && object.empty? }
end
end
Expand Down Expand Up @@ -211,7 +211,6 @@ def publish_files(current_user)
source_bucket = S3QueryService.pre_curation_config[:bucket]
target_bucket = S3QueryService.post_curation_config[:bucket]
empty_files = client_s3_empty_files(reload: true, bucket_name: source_bucket)
# See TODO below
# Do not move the empty files, however, ensure that it is noted that the
# presence of empty files is specified in the provenance log.
unless empty_files.empty?
Expand Down Expand Up @@ -351,26 +350,24 @@ def s3_responses(bucket_name:, prefix:)
responses
end

def get_s3_objects(bucket_name:, prefix:, ignore_directories:)
def get_s3_objects(bucket_name:, prefix:)
start = Time.zone.now
responses = s3_responses(bucket_name:, prefix:)
objects = responses.reduce([]) do |all_objects, resp|
resp_hash = resp.to_h
resp_objects = parse_objects(resp_hash, ignore_directories:)
resp_objects = parse_objects(resp_hash)
all_objects + resp_objects
end
elapsed = Time.zone.now - start
Rails.logger.info("Loading S3 objects. Bucket: #{bucket_name}. Prefix: #{prefix}. Elapsed: #{elapsed} seconds")
objects
end

def parse_objects(resp, ignore_directories: true)
def parse_objects(resp)
objects = []
resp_hash = resp.to_h
response_objects = resp_hash[:contents]
response_objects&.each do |object|
# TODO: Revisit this, we might need this logic
# next if object[:size] == 0 && ignore_directories
s3_file = S3File.new(work: model, filename: object[:key], last_modified: object[:last_modified], size: object[:size], checksum: object[:etag])
objects << s3_file
end
Expand Down
2 changes: 1 addition & 1 deletion spec/services/s3_query_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,7 @@
end

it "retrieves the directories if requested" do
files = s3_query_service.client_s3_files(reload: true, bucket_name: "other-bucket", prefix: "new-prefix", ignore_directories: false)
files = s3_query_service.client_s3_files(reload: true, bucket_name: "other-bucket", prefix: "new-prefix")
expect(files.count).to eq 6
expect(files[0].filename).to match(/README/)
expect(files[1].filename).to match(/SCoData_combined_v1_2020-07_datapackage.json/)
Expand Down

0 comments on commit cbca1f0

Please sign in to comment.