Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add modular genotype & variant ingests #590

Merged
merged 4 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions scripts/after_download.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ tar xfO data/monarch/kg-phenio.tar.gz merged-kg_nodes.tsv | grep ^NCBITaxon | cu

# Repair Orphanet prefixes in MONDO sssom rows as necessary
sed -i 's/\torphanet.ordo\:/\tOrphanet\:/g' data/monarch/mondo.sssom.tsv

# Repair mesh: prefixes in MONDO sssom rows as necessary
sed -i 's@mesh:@MESH:@g' data/monarch/mondo.sssom.tsv
14 changes: 14 additions & 0 deletions src/monarch_ingest/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from biolink_model.datamodel import model # import the pythongen biolink model to get the version
from linkml_runtime import SchemaView
from linkml.utils.helpers import convert_to_snake_case
import requests

# from loguru import logger
import pandas
Expand Down Expand Up @@ -46,6 +47,19 @@ def transform_one(
# if log: logger.removeHandler(fh)
raise ValueError(f"{ingest} is not a valid ingest - see ingests.yaml for a list of options")

# if a url is provided instead of a config, just download the file and copy it to the output dir
if "url" in ingests[ingest]:
for url in ingests[ingest]["url"]:
filename = url.split("/")[-1]

if Path(f"{output_dir}/transform_output/{filename}").is_file() and not force:
continue

response = requests.get(url, allow_redirects=True)
with open(f"{output_dir}/transform_output/{filename}", "wb") as f:
f.write(response.content)
return

source_file = Path(Path(__file__).parent, ingests[ingest]["config"])

if not Path(source_file).is_file():
Expand Down
27 changes: 25 additions & 2 deletions src/monarch_ingest/ingests.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,31 @@
## Pass-through modular ingests
alliance_genotype:
url:
- 'https://github.com/monarch-initiative/alliance-genotype-ingest/releases/latest/download/alliance_genotype_nodes.tsv'
alliance_phenotype:
url:
- 'https://github.com/monarch-initiative/alliance-phenotype-association-ingest/releases/latest/download/alliance_phenotype_edges.tsv'
alliance_disease_association:
url:
- 'https://github.com/monarch-initiative/alliance-disease-association-ingest/releases/latest/download/alliance_disease_edges.tsv'
zfin_genotype_to_phenotype:
url:
- 'https://github.com/monarch-initiative/zfin-genotype-to-phenotype-ingest/releases/latest/download/zfin_genotype_to_phenotype_edges.tsv'
clingen_variant:
url:
- 'https://github.com/monarch-initiative/clingen-ingest/releases/latest/download/clingen_variant_nodes.tsv'
- 'https://github.com/monarch-initiative/clingen-ingest/releases/latest/download/clingen_variant_edges.tsv'
clinvar_variant:
url:
- 'https://github.com/monarch-initiative/clinvar-ingest/releases/latest/download/clinvar_variant_nodes.tsv'
- 'https://github.com/monarch-initiative/clinvar-ingest/releases/latest/download/clinvar_variant_edges.tsv'

## Ingests within this repository

alliance_gene:
config: 'ingests/alliance/gene.yaml'
alliance_gene_to_expression:
config: 'ingests/alliance/gene_to_expression.yaml'
alliance_gene_to_phenotype:
config: 'ingests/alliance/gene_to_phenotype.yaml'
# alliance_publication:
# config: 'ingests/alliance/publication.yaml'
bgee_gene_to_expression:
Expand Down Expand Up @@ -64,3 +86,4 @@ zfin_gene_to_phenotype:
config: 'ingests/zfin/gene_to_phenotype.yaml'
# zfin_publication_to_gene:
# config: 'ingests/zfin/publication_to_gene.yaml'

22 changes: 17 additions & 5 deletions src/monarch_ingest/qc_expect.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ nodes:
provided_by:
alliance_gene_nodes:
min: 290000
alliance_genotype_nodes:
min: 130000
dictybase_gene_nodes:
min: 14000
hgnc_gene_nodes:
Expand All @@ -14,12 +16,14 @@ nodes:
min: 5000
reactome_pathway_nodes:
min: 21000
clinvar_variant_nodes:
min: 1280000
clingen_variant_nodes:
min: 7000
edges:
provided_by:
alliance_gene_to_expression_edges:
min: 1870000
alliance_gene_to_phenotype_edges:
min: 300000
bgee_gene_to_expression_edges:
min: 430000
biogrid_edges:
Expand Down Expand Up @@ -49,8 +53,16 @@ edges:
reactome_gene_to_pathway_edges:
min: 200000
string_protein_links_edges:
min: 1490000
min: 1470000
xenbase_gene_to_phenotype_edges:
min: 2000
zfin_gene_to_phenotype_edges:
min: 148000
alliance_phenotype_edges:
min: 650000
alliance_disease_edges:
min: 10000
zfin_genotype_to_phenotype_edges:
min: 125000
clinvar_variant_edges:
min: 1400000
clingen_variant_edges:
min: 5000
Loading