Skip to content

Commit

Permalink
inference of some Submission and BioProject fields from existing meta…
Browse files Browse the repository at this point in the history
…data
  • Loading branch information
sujaypatil96 committed Jun 14, 2024
1 parent 80c4339 commit adcd8e6
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 32 deletions.
26 changes: 18 additions & 8 deletions nmdc_runtime/site/export/ncbi_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import xml.etree.ElementTree as ET
import xml.dom.minidom

from typing import Any
from urllib.parse import urlparse
from nmdc_runtime.site.export.ncbi_xml_utils import (
handle_controlled_identified_term_value,
Expand All @@ -19,10 +20,19 @@


class NCBISubmissionXML:
def __init__(self, nmdc_study_id: str, ncbi_submission_metadata: dict):
def __init__(self, nmdc_study: Any, ncbi_submission_metadata: dict):
self.root = ET.Element("Submission")

self.nmdc_study_id = nmdc_study_id
self.nmdc_study_id = nmdc_study.get("id")
self.nmdc_study_title = nmdc_study.get("title")
self.nmdc_study_description = nmdc_study.get("description")
self.ncbi_bioproject_id = nmdc_study.get("insdc_bioproject_identifiers")
self.nmdc_pi_email = nmdc_study.get("principal_investigator", {}).get("email")
nmdc_study_pi_name = (
nmdc_study.get("principal_investigator", {}).get("name").split()
)
self.first_name = nmdc_study_pi_name[0]
self.last_name = nmdc_study_pi_name[1] if len(nmdc_study_pi_name) > 1 else None

self.nmdc_ncbi_attribute_mapping_file_url = ncbi_submission_metadata.get(
"nmdc_ncbi_attribute_mapping_file_url"
Expand Down Expand Up @@ -357,18 +367,18 @@ def set_fastq(

def get_submission_xml(self, biosamples_list: list, data_objects_list: list):
self.set_description(
email=self.ncbi_submission_metadata.get("email", ""),
user=self.ncbi_submission_metadata.get("user", ""),
first=self.ncbi_submission_metadata.get("first", ""),
last=self.ncbi_submission_metadata.get("last", ""),
email=self.nmdc_pi_email,
user="National Microbiome Data Collaborative (NMDC)",
first=self.first_name,
last=self.last_name,
org=self.ncbi_submission_metadata.get("organization", ""),
)

if not self.ncbi_bioproject_metadata.get("exists"):
self.set_bioproject(
title=self.ncbi_bioproject_metadata.get("title", ""),
title=self.nmdc_study_title,
project_id=self.ncbi_bioproject_metadata.get("project_id", ""),
description=self.ncbi_bioproject_metadata.get("description", ""),
description=self.nmdc_study_description,
data_type=self.ncbi_bioproject_metadata.get("data_type", ""),
org=self.ncbi_submission_metadata.get("organization", ""),
)
Expand Down
4 changes: 2 additions & 2 deletions nmdc_runtime/site/export/study_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def export_study_biosamples_metadata():


@op(required_resource_keys={"runtime_api_site_client"})
def get_biosamples_by_study_id(context: OpExecutionContext, nmdc_study_id: str):
def get_biosamples_by_study_id(context: OpExecutionContext, nmdc_study: dict):
client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
biosamples = get_all_docs(client, "biosamples", f"part_of:{nmdc_study_id}")
biosamples = get_all_docs(client, "biosamples", f"part_of:{nmdc_study['id']}")
return biosamples
8 changes: 4 additions & 4 deletions nmdc_runtime/site/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
get_neon_pipeline_inputs,
get_df_from_url,
site_code_mapping,
get_ncbi_export_pipeline_study_id,
get_ncbi_export_pipeline_study,
get_data_objects_from_biosamples,
get_ncbi_export_pipeline_inputs,
ncbi_submission_xml_from_nmdc_study,
Expand Down Expand Up @@ -391,11 +391,11 @@ def ingest_neon_surface_water_metadata():

@graph
def nmdc_study_to_ncbi_submission_export():
nmdc_study_id = get_ncbi_export_pipeline_study_id()
nmdc_study = get_ncbi_export_pipeline_study()
ncbi_submission_metadata = get_ncbi_export_pipeline_inputs()
biosamples = get_biosamples_by_study_id(nmdc_study_id)
biosamples = get_biosamples_by_study_id(nmdc_study)
data_objects = get_data_objects_from_biosamples(biosamples)
xml_data = ncbi_submission_xml_from_nmdc_study(
nmdc_study_id, ncbi_submission_metadata, biosamples, data_objects
nmdc_study, ncbi_submission_metadata, biosamples, data_objects
)
ncbi_submission_xml_asset(xml_data)
20 changes: 9 additions & 11 deletions nmdc_runtime/site/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
)
from nmdc_runtime.api.core.util import dotted_path_for, hash_from_str, json_clean, now
from nmdc_runtime.api.endpoints.util import persist_content_and_get_drs_object
from nmdc_runtime.api.endpoints.find import find_study_by_id
from nmdc_runtime.api.models.job import Job, JobOperationMetadata
from nmdc_runtime.api.models.metadata import ChangesheetIn
from nmdc_runtime.api.models.operation import (
Expand Down Expand Up @@ -1014,9 +1015,12 @@ def site_code_mapping() -> dict:
)


@op(config_schema={"nmdc_study_id": str})
def get_ncbi_export_pipeline_study_id(context: OpExecutionContext) -> str:
return context.op_config["nmdc_study_id"]
@op(config_schema={"nmdc_study_id": str}, required_resource_keys={"mongo"})
def get_ncbi_export_pipeline_study(context: OpExecutionContext) -> Any:
nmdc_study = find_study_by_id(
context.op_config["nmdc_study_id"], context.resources.mongo.db
)
return nmdc_study


@op(
Expand All @@ -1025,10 +1029,6 @@ def get_ncbi_export_pipeline_study_id(context: OpExecutionContext) -> str:
"ncbi_submission_metadata": Field(
Permissive(
{
"email": String,
"first": String,
"last": String,
"user": String,
"organization": String,
}
),
Expand All @@ -1038,9 +1038,7 @@ def get_ncbi_export_pipeline_study_id(context: OpExecutionContext) -> str:
"ncbi_bioproject_metadata": Field(
Permissive(
{
"title": String,
"project_id": String,
"description": String,
"data_type": String,
"exists": Bool,
}
Expand Down Expand Up @@ -1090,11 +1088,11 @@ def get_data_objects_from_biosamples(context: OpExecutionContext, biosamples: li
@op
def ncbi_submission_xml_from_nmdc_study(
context: OpExecutionContext,
nmdc_study_id: str,
nmdc_study: Any,
ncbi_exporter_metadata: dict,
biosamples: list,
data_objects: list,
) -> str:
ncbi_exporter = NCBISubmissionXML(nmdc_study_id, ncbi_exporter_metadata)
ncbi_exporter = NCBISubmissionXML(nmdc_study, ncbi_exporter_metadata)
ncbi_xml = ncbi_exporter.get_submission_xml(biosamples, data_objects)
return ncbi_xml
8 changes: 1 addition & 7 deletions nmdc_runtime/site/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -882,7 +882,7 @@ def biosample_export():
},
),
"ops": {
"get_ncbi_export_pipeline_study_id": {
"get_ncbi_export_pipeline_study": {
"config": {
"nmdc_study_id": "",
}
Expand All @@ -891,16 +891,10 @@ def biosample_export():
"config": {
"nmdc_ncbi_attribute_mapping_file_url": "",
"ncbi_submission_metadata": {
"email": "",
"first": "",
"last": "",
"user": "",
"organization": "",
},
"ncbi_bioproject_metadata": {
"title": "",
"project_id": "",
"description": "",
"data_type": "",
"exists": False,
},
Expand Down

0 comments on commit adcd8e6

Please sign in to comment.