From 8218c3478886dd280ed9cdb7192aa5c63e6f6cae Mon Sep 17 00:00:00 2001 From: Carolyn Cole Date: Tue, 27 Sep 2022 11:24:03 -0400 Subject: [PATCH] S3 key already contains the DOI Lets utilize the key instead of the file name. This stops an infinite loop from accuring in tests and for S3 files to not get added repeatedly when s3 is queried. See https://pdc-describe-staging.princeton.edu/describe/works/75 for an example of a work where the files got added each time the work was saved --- app/models/work.rb | 16 ++++++----- spec/factories/work.rb | 2 +- spec/models/work_spec.rb | 8 ++++-- spec/system/view_data_in_s3_spec.rb | 11 ++++---- spec/system/work_upload_s3_objects_spec.rb | 33 ++++++++++++---------- 5 files changed, 39 insertions(+), 31 deletions(-) diff --git a/app/models/work.rb b/app/models/work.rb index e2922a72d..d5322d201 100644 --- a/app/models/work.rb +++ b/app/models/work.rb @@ -503,10 +503,14 @@ def update_ark_information end def generate_attachment_key(attachment) - key_base = "#{doi}/#{id}" - attachment_filename = attachment.filename.to_s - attachment_key = [key_base, attachment_filename].join("/") + attachment_key = attachment.key + + # Files actually coming from S3 include the DOI and bucket as part of the file name + # Files being attached in another manner may not have it, so we should include it. + # This is really for testing only. + key_base = "#{doi}/#{id}" + attachment_key = [key_base, attachment_filename].join("/") unless attachment_key.include?(key_base) attachment_ext = File.extname(attachment_filename) attachment_query = attachment_key.gsub(attachment_ext, "") @@ -636,10 +640,8 @@ def s3_resources alias pre_curation_s3_resources s3_resources def s3_object_persisted?(s3_file) - uploads_filenames = uploads.map(&:filename) - - persisted_filenames = uploads_filenames.select { |filename| filename.to_s == s3_file.filename } - !persisted_filenames.empty? + uploads_keys = uploads.map(&:key) + uploads_keys.include?(s3_file.key) end def add_pre_curation_s3_object(s3_file) diff --git a/spec/factories/work.rb b/spec/factories/work.rb index f47d21ac8..1592fb2c9 100644 --- a/spec/factories/work.rb +++ b/spec/factories/work.rb @@ -27,7 +27,7 @@ collection { Collection.research_data } resource do PDCMetadata::Resource.new_from_json({ - "doi": "https://doi.org/10.34770/pe9w-x904", + "doi": "10.34770/pe9w-x904", "ark": "ark:/88435/dsp01zc77st047", "identifier_type": "DOI", "titles": [{ "title": "Shakespeare and Company Project Dataset: Lending Library Members, Books, Events" }], diff --git a/spec/models/work_spec.rb b/spec/models/work_spec.rb index b176577e6..1a21fa79c 100644 --- a/spec/models/work_spec.rb +++ b/spec/models/work_spec.rb @@ -254,7 +254,7 @@ let(:work) { FactoryBot.create(:shakespeare_and_company_work) } it "has a DOI" do expect(work.title).to eq "Shakespeare and Company Project Dataset: Lending Library Members, Books, Events" - expect(work.resource.doi).to eq "https://doi.org/10.34770/pe9w-x904" + expect(work.resource.doi).to eq "10.34770/pe9w-x904" end end @@ -591,7 +591,7 @@ let(:s3_query_service_double) { instance_double(S3QueryService) } let(:file1) do S3File.new( - filename: "SCoData_combined_v1_2020-07_README.txt", + filename: "#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_README.txt", last_modified: Time.parse("2022-04-21T18:29:40.000Z"), size: 10_759, checksum: "abc123" @@ -599,7 +599,7 @@ end let(:file2) do S3File.new( - filename: "SCoData_combined_v1_2020-07_datapackage.json", + filename: "#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_datapackage.json", last_modified: Time.parse("2022-04-21T18:30:07.000Z"), size: 12_739, checksum: "abc567" @@ -628,6 +628,8 @@ expect(work.pre_curation_uploads.first.key).to eq("#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_README.txt") expect(work.pre_curation_uploads.last).to be_a(ActiveStorage::Attachment) expect(work.pre_curation_uploads.last.key).to eq("#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_datapackage.json") + work.valid? + expect(work.pre_curation_uploads.length).to eq(2) end context "a blob already exists for one of the files" do diff --git a/spec/system/view_data_in_s3_spec.rb b/spec/system/view_data_in_s3_spec.rb index ee42c20a1..1214077b3 100644 --- a/spec/system/view_data_in_s3_spec.rb +++ b/spec/system/view_data_in_s3_spec.rb @@ -13,7 +13,7 @@ let(:s3_query_service_double) { instance_double(S3QueryService) } let(:file1) do S3File.new( - filename: "SCoData_combined_v1_2020-07_README.txt", + filename: "#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_README.txt", last_modified: Time.parse("2022-04-21T18:29:40.000Z"), size: 10_759, checksum: "abc123" @@ -21,7 +21,7 @@ end let(:file2) do S3File.new( - filename: "SCoData_combined_v1_2020-07_datapackage.json", + filename: "#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_datapackage.json", last_modified: Time.parse("2022-04-21T18:30:07.000Z"), size: 12_739, checksum: "abc567" @@ -51,12 +51,13 @@ expect(page).to have_content work.title expect(page).to have_content "us_covid_2019.csv" - expect(page).to have_content file1.filename - - expect(page).to have_content file2.filename + expect(page).to have_content ActiveStorage::Filename.new(file1.filename).to_s + expect(page).to have_content ActiveStorage::Filename.new(file2.filename).to_s click_on "Edit" expect(page).to have_content "us_covid_2019.csv" + expect(page).to have_content ActiveStorage::Filename.new(file1.filename).to_s + expect(page).to have_content ActiveStorage::Filename.new(file2.filename).to_s end end end diff --git a/spec/system/work_upload_s3_objects_spec.rb b/spec/system/work_upload_s3_objects_spec.rb index d5825bcf8..bb902aad5 100644 --- a/spec/system/work_upload_s3_objects_spec.rb +++ b/spec/system/work_upload_s3_objects_spec.rb @@ -9,7 +9,7 @@ let(:s3_query_service_double) { instance_double(S3QueryService) } let(:file1) do S3File.new( - filename: "SCoData_combined_v1_2020-07_README.txt", + filename: "#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_README.txt", last_modified: Time.parse("2022-04-21T18:29:40.000Z"), size: 10_759, checksum: "abc123" @@ -17,12 +17,14 @@ end let(:file2) do S3File.new( - filename: "SCoData_combined_v1_2020-07_datapackage.json", + filename: "#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_datapackage.json", last_modified: Time.parse("2022-04-21T18:30:07.000Z"), size: 12_739, checksum: "abc567" ) end + let(:filename1) { ActiveStorage::Filename.new(file1.filename).to_s } + let(:filename2) { ActiveStorage::Filename.new(file1.filename).to_s } let(:s3_data) { [file1, file2] } let(:bucket_url) do "https://example-bucket.s3.amazonaws.com/" @@ -67,10 +69,9 @@ expect(work.pre_curation_uploads.length).to eq(3) visit work_path(work) expect(page).to have_content work.title - expect(page).to have_content upload_file_name - expect(page).to have_content file1.filename - expect(page).to have_content file2.filename + expect(page).to have_content filename1 + expect(page).to have_content filename2 end it "renders S3 Bucket Objects and file uploads on the edit page", js: true do @@ -96,8 +97,9 @@ visit work_path(work) expect(page).to have_content work.title - expect(page).to have_content file1.filename - expect(page).to have_content file2.filename + expect(page).not_to have_content upload_file_name + expect(page).to have_content filename1 + expect(page).to have_content filename2 end it "renders only the S3 Bucket Objects on the edit page", js: true do @@ -113,6 +115,7 @@ let(:collection) { approved_work.collection } let(:user) { FactoryBot.create(:user, collections_to_admin: [collection]) } let(:approved_work) { FactoryBot.create(:shakespeare_and_company_work) } + let(:work) { approved_work } # make sure the id in the file key matches the work before do # approved_work.pre_curation_uploads.attach(upload_file) @@ -129,8 +132,8 @@ expect(page).to have_content approved_work.title expect(page).to have_content upload_file_name - expect(page).to have_content file1.filename - expect(page).to have_content file2.filename + expect(page).to have_content filename1 + expect(page).to have_content filename2 end it "renders S3 Bucket Objects and file uploads on the edit page", js: true do @@ -140,8 +143,8 @@ click_on "Edit" expect(page).to have_content upload_file_name - expect(page).to have_content file1.filename - expect(page).to have_content file2.filename + expect(page).to have_content filename1 + expect(page).to have_content filename2 end context "when files are deleted from a Work" do @@ -157,8 +160,8 @@ visit work_path(approved_work) expect(page).to have_content approved_work.title - expect(page).to have_content file1.filename - expect(page).to have_content file2.filename + expect(page).to have_content filename1 + expect(page).to have_content filename2 end it "renders only the S3 Bucket Objects on the edit page", js: true do @@ -167,8 +170,8 @@ click_on "Edit" expect(page).not_to have_content upload_file_name - expect(page).to have_content file1.filename - expect(page).to have_content file2.filename + expect(page).to have_content filename1 + expect(page).to have_content filename2 end end end