Skip to content

Commit

Permalink
S3 key already contains the DOI
Browse files Browse the repository at this point in the history
Lets utilize the key instead of the file name.  This stops an infinite loop from accuring in tests and for S3 files to not get added repeatedly when s3 is queried.
See https://pdc-describe-staging.princeton.edu/describe/works/75 for an example of a work where the files got added each time the work was saved
  • Loading branch information
carolyncole committed Sep 28, 2022
1 parent 85cb196 commit 567ab81
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 32 deletions.
16 changes: 9 additions & 7 deletions app/models/work.rb
Original file line number Diff line number Diff line change
Expand Up @@ -503,10 +503,14 @@ def update_ark_information
end

def generate_attachment_key(attachment)
key_base = "#{doi}/#{id}"

attachment_filename = attachment.filename.to_s
attachment_key = [key_base, attachment_filename].join("/")
attachment_key = attachment.key

# Files actually coming from S3 include the DOI and bucket as part of the file name
# Files being attached in another manner may not have it, so we should include it.
# This is really for testing only.
key_base = "#{doi}/#{id}"
attachment_key = [key_base, attachment_filename].join("/") unless attachment_key.include?(key_base)

attachment_ext = File.extname(attachment_filename)
attachment_query = attachment_key.gsub(attachment_ext, "")
Expand Down Expand Up @@ -636,10 +640,8 @@ def s3_resources
alias pre_curation_s3_resources s3_resources

def s3_object_persisted?(s3_file)
uploads_filenames = uploads.map(&:filename)

persisted_filenames = uploads_filenames.select { |filename| filename.to_s == s3_file.filename }
!persisted_filenames.empty?
uploads_keys = uploads.map(&:key)
uploads_keys.include?(s3_file.key)
end

def add_pre_curation_s3_object(s3_file)
Expand Down
2 changes: 1 addition & 1 deletion spec/factories/work.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
collection { Collection.research_data }
resource do
PDCMetadata::Resource.new_from_json({
"doi": "https://doi.org/10.34770/pe9w-x904",
"doi": "10.34770/pe9w-x904",
"ark": "ark:/88435/dsp01zc77st047",
"identifier_type": "DOI",
"titles": [{ "title": "Shakespeare and Company Project Dataset: Lending Library Members, Books, Events" }],
Expand Down
8 changes: 5 additions & 3 deletions spec/models/work_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@
let(:work) { FactoryBot.create(:shakespeare_and_company_work) }
it "has a DOI" do
expect(work.title).to eq "Shakespeare and Company Project Dataset: Lending Library Members, Books, Events"
expect(work.resource.doi).to eq "https://doi.org/10.34770/pe9w-x904"
expect(work.resource.doi).to eq "10.34770/pe9w-x904"
end
end

Expand Down Expand Up @@ -591,15 +591,15 @@
let(:s3_query_service_double) { instance_double(S3QueryService) }
let(:file1) do
S3File.new(
filename: "SCoData_combined_v1_2020-07_README.txt",
filename: "#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_README.txt",
last_modified: Time.parse("2022-04-21T18:29:40.000Z"),
size: 10_759,
checksum: "abc123"
)
end
let(:file2) do
S3File.new(
filename: "SCoData_combined_v1_2020-07_datapackage.json",
filename: "#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_datapackage.json",
last_modified: Time.parse("2022-04-21T18:30:07.000Z"),
size: 12_739,
checksum: "abc567"
Expand Down Expand Up @@ -628,6 +628,8 @@
expect(work.pre_curation_uploads.first.key).to eq("#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_README.txt")
expect(work.pre_curation_uploads.last).to be_a(ActiveStorage::Attachment)
expect(work.pre_curation_uploads.last.key).to eq("#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_datapackage.json")
work.valid?
expect(work.pre_curation_uploads.length).to eq(2)
end

context "a blob already exists for one of the files" do
Expand Down
13 changes: 7 additions & 6 deletions spec/system/view_data_in_s3_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@

describe "when a dataset has a DOI and its data is in S3", mock_s3_query_service: false do
let(:user) { FactoryBot.create :princeton_submitter }
let(:work) { FactoryBot.create(:shakespeare_and_company_work) }
let(:work) { FactoryBot.create(:shakespeare_and_company_work, created_by_user_id: user.id) }
let(:s3_query_service_double) { instance_double(S3QueryService) }
let(:file1) do
S3File.new(
filename: "SCoData_combined_v1_2020-07_README.txt",
filename: "#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_README.txt",
last_modified: Time.parse("2022-04-21T18:29:40.000Z"),
size: 10_759,
checksum: "abc123"
)
end
let(:file2) do
S3File.new(
filename: "SCoData_combined_v1_2020-07_datapackage.json",
filename: "#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_datapackage.json",
last_modified: Time.parse("2022-04-21T18:30:07.000Z"),
size: 12_739,
checksum: "abc567"
Expand Down Expand Up @@ -51,12 +51,13 @@
expect(page).to have_content work.title
expect(page).to have_content "us_covid_2019.csv"

expect(page).to have_content file1.filename

expect(page).to have_content file2.filename
expect(page).to have_content ActiveStorage::Filename.new(file1.filename).to_s
expect(page).to have_content ActiveStorage::Filename.new(file2.filename).to_s

click_on "Edit"
expect(page).to have_content "us_covid_2019.csv"
expect(page).to have_content ActiveStorage::Filename.new(file1.filename).to_s
expect(page).to have_content ActiveStorage::Filename.new(file2.filename).to_s
end
end
end
33 changes: 18 additions & 15 deletions spec/system/work_upload_s3_objects_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,22 @@
let(:s3_query_service_double) { instance_double(S3QueryService) }
let(:file1) do
S3File.new(
filename: "SCoData_combined_v1_2020-07_README.txt",
filename: "#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_README.txt",
last_modified: Time.parse("2022-04-21T18:29:40.000Z"),
size: 10_759,
checksum: "abc123"
)
end
let(:file2) do
S3File.new(
filename: "SCoData_combined_v1_2020-07_datapackage.json",
filename: "#{work.doi}/#{work.id}/SCoData_combined_v1_2020-07_datapackage.json",
last_modified: Time.parse("2022-04-21T18:30:07.000Z"),
size: 12_739,
checksum: "abc567"
)
end
let(:filename1) { ActiveStorage::Filename.new(file1.filename).to_s }
let(:filename2) { ActiveStorage::Filename.new(file1.filename).to_s }
let(:s3_data) { [file1, file2] }
let(:bucket_url) do
"https://example-bucket.s3.amazonaws.com/"
Expand Down Expand Up @@ -67,10 +69,9 @@
expect(work.pre_curation_uploads.length).to eq(3)
visit work_path(work)
expect(page).to have_content work.title

expect(page).to have_content upload_file_name
expect(page).to have_content file1.filename
expect(page).to have_content file2.filename
expect(page).to have_content filename1
expect(page).to have_content filename2
end

it "renders S3 Bucket Objects and file uploads on the edit page", js: true do
Expand All @@ -96,8 +97,9 @@
visit work_path(work)

expect(page).to have_content work.title
expect(page).to have_content file1.filename
expect(page).to have_content file2.filename
expect(page).not_to have_content upload_file_name
expect(page).to have_content filename1
expect(page).to have_content filename2
end

it "renders only the S3 Bucket Objects on the edit page", js: true do
Expand All @@ -113,6 +115,7 @@
let(:collection) { approved_work.collection }
let(:user) { FactoryBot.create(:user, collections_to_admin: [collection]) }
let(:approved_work) { FactoryBot.create(:shakespeare_and_company_work) }
let(:work) { approved_work } # make sure the id in the file key matches the work

before do
# approved_work.pre_curation_uploads.attach(upload_file)
Expand All @@ -129,8 +132,8 @@
expect(page).to have_content approved_work.title

expect(page).to have_content upload_file_name
expect(page).to have_content file1.filename
expect(page).to have_content file2.filename
expect(page).to have_content filename1
expect(page).to have_content filename2
end

it "renders S3 Bucket Objects and file uploads on the edit page", js: true do
Expand All @@ -140,8 +143,8 @@
click_on "Edit"

expect(page).to have_content upload_file_name
expect(page).to have_content file1.filename
expect(page).to have_content file2.filename
expect(page).to have_content filename1
expect(page).to have_content filename2
end

context "when files are deleted from a Work" do
Expand All @@ -157,8 +160,8 @@
visit work_path(approved_work)

expect(page).to have_content approved_work.title
expect(page).to have_content file1.filename
expect(page).to have_content file2.filename
expect(page).to have_content filename1
expect(page).to have_content filename2
end

it "renders only the S3 Bucket Objects on the edit page", js: true do
Expand All @@ -167,8 +170,8 @@
click_on "Edit"

expect(page).not_to have_content upload_file_name
expect(page).to have_content file1.filename
expect(page).to have_content file2.filename
expect(page).to have_content filename1
expect(page).to have_content filename2
end
end
end
Expand Down

0 comments on commit 567ab81

Please sign in to comment.