Skip to content

Commit

Permalink
Merge pull request #192 from microbiomedata/2043_extraction_target_slot
Browse files Browse the repository at this point in the history
Make `extraction_target` slot multivalued and implement migrator
  • Loading branch information
turbomam committed Jun 5, 2024
2 parents 110777a + ad39841 commit ca304e4
Show file tree
Hide file tree
Showing 13 changed files with 61 additions and 15 deletions.
31 changes: 31 additions & 0 deletions nmdc_schema/migrators/migrator_from_X_to_PR192.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from nmdc_schema.migrators.migrator_base import MigratorBase


class Migrator(MigratorBase):
"""
Migrates data from X to PR192, makes the existing extraction_target slot multivalued and change its name to extraction_targets
Should be run after migrator_from_X_to_PR2_and_PR24.py.
"""

def upgrade(self):
r"""Migrates the database from conforming to the original schema, to conforming to the new schema."""

self.adapter.process_each_document("extraction_set", [self.rename_extraction_target])

def rename_extraction_target(self, extraction: dict) -> dict:
r"""
Renames the `extraction_target` field to `extraction_targets` and makes it multivalued.
>>> m = Migrator()
>>> m.rename_extraction_target({'id': 123}) # no `extraction_target` field
{'id': 123}
>>> m.rename_extraction_target({'id': 123, 'extraction_target': 'DNA'}) # test: renames field and casts it as a list value
{'id': 123, 'extraction_targets': ['DNA']}
"""

self.logger.info(f"Starting migration of {extraction['id']}")
if "extraction_target" in extraction:
extraction["extraction_targets"] = [extraction.pop("extraction_target")]
return extraction
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,6 @@ extraction_set:
end_date: "2019-11-08"
qc_status: pass
extraction_method: phenol/chloroform extraction # not allowed anymore
extraction_target: DNA
extraction_targets:
- DNA

Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ extraction_set:
has_numeric_value: 0.25
has_unit: gram
qc_status: pass
extraction_target: DNA
extraction_targets:
- DNA
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ extraction_set:
- bare:pool_out_1
has_output:
- bare:dna_extract_1
extraction_target: DNA
extraction_targets:
- DNA
end_date: 2021-01-15

library_preparation_set:
Expand Down
3 changes: 2 additions & 1 deletion src/data/invalid/Extraction-invalid_enum.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ has_output:
start_date: "2020-06-24T22:06Z"
end_date: "2021-08-19"
qc_status: "pass"
extraction_target: "phenol/chloroform extraction"
extraction_targets:
- "phenol/chloroform extraction"
input_mass:
has_numeric_value: 0.25
has_unit: "g"
Expand Down
3 changes: 2 additions & 1 deletion src/data/valid/Database-extraction_set-exhaustive.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ extraction_set:

#extraction_type should have range of dna_extraction_enum with possible values of 'DNA extraction', 'RNA extraction', 'protein extraction'
#http://purl.obolibrary.org/obo/OBI_0000257
extraction_target: DNA
extraction_targets:
- DNA
#not needed at this time
#protocol_materials:
# - { material_name : DNeasy 96 PowerSoil Pro Kit, material_vendor: Qiagen, catalog_number: 47017 }
Expand Down
12 changes: 8 additions & 4 deletions src/data/valid/Database-interleaved.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3325,7 +3325,8 @@ extraction_set:
start_date: '2019-99-08'
end_date: '2019-99-08'
qc_status: pass
extraction_target: DNA
extraction_targets:
- DNA
input_mass:
type: nmdc:QuantityValue
has_numeric_value: 0.25
Expand All @@ -3337,7 +3338,8 @@ extraction_set:
- nmdc:bsm-99-74d83z
has_output:
- nmdc:procsm-99-k4m6j
extraction_target: DNA
extraction_targets:
- DNA
input_mass:
type: nmdc:QuantityValue
has_numeric_value: 0.25
Expand All @@ -3349,7 +3351,8 @@ extraction_set:
- nmdc:bsm-99-49z38t
has_output:
- nmdc:procsm-99-29dl
extraction_target: DNA
extraction_targets:
- DNA
volume:
type: nmdc:QuantityValue
has_numeric_value: 100
Expand All @@ -3361,7 +3364,8 @@ extraction_set:
- nmdc:bsm-99-bbbbbb
has_output:
- nmdc:procsm-99-3z8t
extraction_target: DNA
extraction_targets:
- DNA
field_research_site_set:
- id: nmdc:frsite-99-SPreao
type: nmdc:FieldResearchSite
Expand Down
3 changes: 2 additions & 1 deletion src/data/valid/Database-neon-story.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ extraction_set:
- nmdc:procsm-99-pooled
has_output:
- nmdc:procsm-99-extract
extraction_target: DNA
extraction_targets:
- DNA
start_date: "2021-01-15"
end_date: "2021-01-15"
type: nmdc:Extraction
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ extraction_set:
- nmdc:procsm-99-xyz2
start_date: '2021-01-15'
end_date: '2021-01-15'
extraction_target: DNA
extraction_targets:
- DNA
type: nmdc:Extraction
biosample_set:
- id: nmdc:bsm-99-abcdef1
Expand Down
3 changes: 2 additions & 1 deletion src/data/valid/Database-nucleic-extraction.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ extraction_set:

#extraction_type should have range of dna_extraction_enum with possible values of 'DNA extraction', 'RNA extraction', 'protein extraction'
#http://purl.obolibrary.org/obo/OBI_0000257
extraction_target: DNA
extraction_targets:
- DNA

#not needed at this time
#protocol_materials:
Expand Down
3 changes: 2 additions & 1 deletion src/data/valid/Extraction-NEON.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ start_date: "2020-06-24T22:06Z"
end_date: "2021-08-19"
processing_institution: Battelle
qc_status: pass
extraction_target: DNA
extraction_targets:
- DNA
input_mass:
type: nmdc:QuantityValue
has_numeric_value: 0.25
Expand Down
4 changes: 3 additions & 1 deletion src/schema/basic_slots.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -466,9 +466,10 @@ slots:
minimum_value: 0
maximum_value: 2000

extraction_target:
extraction_targets:
description: Provides the target biomolecule that has been separated from a sample during an extraction process.
rank: 1000
multivalued: true
range: ExtractionTargetEnum
notes:
- todos, remove nucl_acid_ext from OmicsProcessing (DataGeneration)
Expand Down Expand Up @@ -1057,6 +1058,7 @@ enums:
RNA: { }
metabolite: { }
protein: { }
lipid: { }

ProcessingInstitutionEnum:
notes:
Expand Down
2 changes: 1 addition & 1 deletion src/schema/nmdc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ classes:
- OBI:0302884
slots:
- substances_used
- extraction_target
- extraction_targets
- input_mass
- volume
slot_usage:
Expand Down

0 comments on commit ca304e4

Please sign in to comment.