diff --git a/nmdc_schema/migrators/migrator_from_X_to_PR192.py b/nmdc_schema/migrators/migrator_from_X_to_PR192.py new file mode 100644 index 0000000000..df53a71243 --- /dev/null +++ b/nmdc_schema/migrators/migrator_from_X_to_PR192.py @@ -0,0 +1,31 @@ +from typing import List +from nmdc_schema.migrators.migrator_base import MigratorBase + + +class Migrator(MigratorBase): + """ + Migrates data from X to PR192, makes the existing extraction_target slot multivalued and change its name to extraction_targets + + Should be run after migrator_from_X_to_PR2_and_PR24.py. + """ + + def upgrade(self): + r"""Migrates the database from conforming to the original schema, to conforming to the new schema.""" + + self.adapter.process_each_document("extraction_set", [self.rename_extraction_target]) + + def rename_extraction_target(self, extraction: dict) -> dict: + r""" + Renames the `extraction_target` field to `extraction_targets` and makes it multivalued. + + >>> m = Migrator() + >>> m.rename_extraction_target({'id': 123}) # no `extraction_target` field + {'id': 123} + >>> m.rename_extraction_target({'id': 123, 'extraction_target': 'DNA'}) # test: renames field and casts it as a list value + {'id': 123, 'extraction_targets': ['DNA']} + """ + + self.logger.info(f"Starting migration of {extraction['id']}") + if "extraction_target" in extraction: + extraction["extraction_targets"] = [extraction.pop("extraction_target")] + return extraction \ No newline at end of file diff --git a/src/data/invalid/Database-Extraction-extraction_method-slot-retired.yaml b/src/data/invalid/Database-Extraction-extraction_method-slot-retired.yaml index 5c36e11f0f..a1f6207bd2 100644 --- a/src/data/invalid/Database-Extraction-extraction_method-slot-retired.yaml +++ b/src/data/invalid/Database-Extraction-extraction_method-slot-retired.yaml @@ -16,5 +16,6 @@ extraction_set: end_date: "2019-11-08" qc_status: pass extraction_method: phenol/chloroform extraction # not allowed anymore - extraction_target: DNA + extraction_targets: + - DNA diff --git a/src/data/invalid/Database-Extraction-sample_mass-now_input_mass.yaml b/src/data/invalid/Database-Extraction-sample_mass-now_input_mass.yaml index cbb55c71b2..6b187fd58e 100644 --- a/src/data/invalid/Database-Extraction-sample_mass-now_input_mass.yaml +++ b/src/data/invalid/Database-Extraction-sample_mass-now_input_mass.yaml @@ -19,4 +19,5 @@ extraction_set: has_numeric_value: 0.25 has_unit: gram qc_status: pass - extraction_target: DNA + extraction_targets: + - DNA diff --git a/src/data/invalid/Database-plannedprocess-non-string-end_datet.yaml b/src/data/invalid/Database-plannedprocess-non-string-end_datet.yaml index afa8cb6ffb..0c6eae9937 100644 --- a/src/data/invalid/Database-plannedprocess-non-string-end_datet.yaml +++ b/src/data/invalid/Database-plannedprocess-non-string-end_datet.yaml @@ -6,7 +6,8 @@ extraction_set: - bare:pool_out_1 has_output: - bare:dna_extract_1 - extraction_target: DNA + extraction_targets: + - DNA end_date: 2021-01-15 library_preparation_set: diff --git a/src/data/invalid/Extraction-invalid_enum.yaml b/src/data/invalid/Extraction-invalid_enum.yaml index 3c95e517d1..5f3299d09e 100644 --- a/src/data/invalid/Extraction-invalid_enum.yaml +++ b/src/data/invalid/Extraction-invalid_enum.yaml @@ -6,7 +6,8 @@ has_output: start_date: "2020-06-24T22:06Z" end_date: "2021-08-19" qc_status: "pass" -extraction_target: "phenol/chloroform extraction" +extraction_targets: + - "phenol/chloroform extraction" input_mass: has_numeric_value: 0.25 has_unit: "g" diff --git a/src/data/valid/Database-extraction_set-exhaustive.yaml b/src/data/valid/Database-extraction_set-exhaustive.yaml index dca4f9eaaf..5617040518 100644 --- a/src/data/valid/Database-extraction_set-exhaustive.yaml +++ b/src/data/valid/Database-extraction_set-exhaustive.yaml @@ -35,7 +35,8 @@ extraction_set: #extraction_type should have range of dna_extraction_enum with possible values of 'DNA extraction', 'RNA extraction', 'protein extraction' #http://purl.obolibrary.org/obo/OBI_0000257 - extraction_target: DNA + extraction_targets: + - DNA #not needed at this time #protocol_materials: # - { material_name : DNeasy 96 PowerSoil Pro Kit, material_vendor: Qiagen, catalog_number: 47017 } diff --git a/src/data/valid/Database-interleaved.yaml b/src/data/valid/Database-interleaved.yaml index 121a8e8b39..f579103d70 100644 --- a/src/data/valid/Database-interleaved.yaml +++ b/src/data/valid/Database-interleaved.yaml @@ -3325,7 +3325,8 @@ extraction_set: start_date: '2019-99-08' end_date: '2019-99-08' qc_status: pass - extraction_target: DNA + extraction_targets: + - DNA input_mass: type: nmdc:QuantityValue has_numeric_value: 0.25 @@ -3337,7 +3338,8 @@ extraction_set: - nmdc:bsm-99-74d83z has_output: - nmdc:procsm-99-k4m6j - extraction_target: DNA + extraction_targets: + - DNA input_mass: type: nmdc:QuantityValue has_numeric_value: 0.25 @@ -3349,7 +3351,8 @@ extraction_set: - nmdc:bsm-99-49z38t has_output: - nmdc:procsm-99-29dl - extraction_target: DNA + extraction_targets: + - DNA volume: type: nmdc:QuantityValue has_numeric_value: 100 @@ -3361,7 +3364,8 @@ extraction_set: - nmdc:bsm-99-bbbbbb has_output: - nmdc:procsm-99-3z8t - extraction_target: DNA + extraction_targets: + - DNA field_research_site_set: - id: nmdc:frsite-99-SPreao type: nmdc:FieldResearchSite diff --git a/src/data/valid/Database-neon-story.yaml b/src/data/valid/Database-neon-story.yaml index a17a838e6b..76eef31ac9 100644 --- a/src/data/valid/Database-neon-story.yaml +++ b/src/data/valid/Database-neon-story.yaml @@ -33,7 +33,8 @@ extraction_set: - nmdc:procsm-99-pooled has_output: - nmdc:procsm-99-extract - extraction_target: DNA + extraction_targets: + - DNA start_date: "2021-01-15" end_date: "2021-01-15" type: nmdc:Extraction diff --git a/src/data/valid/Database-neon_Biosample_to_DataObject_NEON.yaml b/src/data/valid/Database-neon_Biosample_to_DataObject_NEON.yaml index 6de84b64ca..c591b4251a 100644 --- a/src/data/valid/Database-neon_Biosample_to_DataObject_NEON.yaml +++ b/src/data/valid/Database-neon_Biosample_to_DataObject_NEON.yaml @@ -28,7 +28,8 @@ extraction_set: - nmdc:procsm-99-xyz2 start_date: '2021-01-15' end_date: '2021-01-15' - extraction_target: DNA + extraction_targets: + - DNA type: nmdc:Extraction biosample_set: - id: nmdc:bsm-99-abcdef1 diff --git a/src/data/valid/Database-nucleic-extraction.yaml b/src/data/valid/Database-nucleic-extraction.yaml index e0b8d7a4fc..f5e6b8a716 100644 --- a/src/data/valid/Database-nucleic-extraction.yaml +++ b/src/data/valid/Database-nucleic-extraction.yaml @@ -47,7 +47,8 @@ extraction_set: #extraction_type should have range of dna_extraction_enum with possible values of 'DNA extraction', 'RNA extraction', 'protein extraction' #http://purl.obolibrary.org/obo/OBI_0000257 - extraction_target: DNA + extraction_targets: + - DNA #not needed at this time #protocol_materials: diff --git a/src/data/valid/Extraction-NEON.yaml b/src/data/valid/Extraction-NEON.yaml index c512733ed5..2526471fa8 100644 --- a/src/data/valid/Extraction-NEON.yaml +++ b/src/data/valid/Extraction-NEON.yaml @@ -9,7 +9,8 @@ start_date: "2020-06-24T22:06Z" end_date: "2021-08-19" processing_institution: Battelle qc_status: pass -extraction_target: DNA +extraction_targets: + - DNA input_mass: type: nmdc:QuantityValue has_numeric_value: 0.25 diff --git a/src/schema/basic_slots.yaml b/src/schema/basic_slots.yaml index 3081073b73..6df847003d 100644 --- a/src/schema/basic_slots.yaml +++ b/src/schema/basic_slots.yaml @@ -466,9 +466,10 @@ slots: minimum_value: 0 maximum_value: 2000 - extraction_target: + extraction_targets: description: Provides the target biomolecule that has been separated from a sample during an extraction process. rank: 1000 + multivalued: true range: ExtractionTargetEnum notes: - todos, remove nucl_acid_ext from OmicsProcessing (DataGeneration) @@ -1057,6 +1058,7 @@ enums: RNA: { } metabolite: { } protein: { } + lipid: { } ProcessingInstitutionEnum: notes: diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index cc11661e3d..93a9f218f5 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -319,7 +319,7 @@ classes: - OBI:0302884 slots: - substances_used - - extraction_target + - extraction_targets - input_mass - volume slot_usage: