Skip to content

Commit

Permalink
Merge pull request #590 from monarch-initiative/merge-modulars
Browse files Browse the repository at this point in the history
Add modular genotype & variant ingests
  • Loading branch information
kevinschaper committed Jun 12, 2024
2 parents 47c57ce + bffde88 commit 8840883
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 7 deletions.
3 changes: 3 additions & 0 deletions scripts/after_download.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ tar xfO data/monarch/kg-phenio.tar.gz merged-kg_nodes.tsv | grep ^NCBITaxon | cu

# Repair Orphanet prefixes in MONDO sssom rows as necessary
sed -i 's/\torphanet.ordo\:/\tOrphanet\:/g' data/monarch/mondo.sssom.tsv

# Repair mesh: prefixes in MONDO sssom rows as necessary
sed -i 's@mesh:@MESH:@g' data/monarch/mondo.sssom.tsv
14 changes: 14 additions & 0 deletions src/monarch_ingest/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from biolink_model.datamodel import model # import the pythongen biolink model to get the version
from linkml_runtime import SchemaView
from linkml.utils.helpers import convert_to_snake_case
import requests

# from loguru import logger
import pandas
Expand Down Expand Up @@ -46,6 +47,19 @@ def transform_one(
# if log: logger.removeHandler(fh)
raise ValueError(f"{ingest} is not a valid ingest - see ingests.yaml for a list of options")

# if a url is provided instead of a config, just download the file and copy it to the output dir
if "url" in ingests[ingest]:
for url in ingests[ingest]["url"]:
filename = url.split("/")[-1]

if Path(f"{output_dir}/transform_output/{filename}").is_file() and not force:
continue

response = requests.get(url, allow_redirects=True)
with open(f"{output_dir}/transform_output/{filename}", "wb") as f:
f.write(response.content)
return

source_file = Path(Path(__file__).parent, ingests[ingest]["config"])

if not Path(source_file).is_file():
Expand Down
27 changes: 25 additions & 2 deletions src/monarch_ingest/ingests.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,31 @@
## Pass-through modular ingests
alliance_genotype:
url:
- 'https://github.com/monarch-initiative/alliance-genotype-ingest/releases/latest/download/alliance_genotype_nodes.tsv'
alliance_phenotype:
url:
- 'https://github.com/monarch-initiative/alliance-phenotype-association-ingest/releases/latest/download/alliance_phenotype_edges.tsv'
alliance_disease_association:
url:
- 'https://github.com/monarch-initiative/alliance-disease-association-ingest/releases/latest/download/alliance_disease_edges.tsv'
zfin_genotype_to_phenotype:
url:
- 'https://github.com/monarch-initiative/zfin-genotype-to-phenotype-ingest/releases/latest/download/zfin_genotype_to_phenotype_edges.tsv'
clingen_variant:
url:
- 'https://github.com/monarch-initiative/clingen-ingest/releases/latest/download/clingen_variant_nodes.tsv'
- 'https://github.com/monarch-initiative/clingen-ingest/releases/latest/download/clingen_variant_edges.tsv'
clinvar_variant:
url:
- 'https://github.com/monarch-initiative/clinvar-ingest/releases/latest/download/clinvar_variant_nodes.tsv'
- 'https://github.com/monarch-initiative/clinvar-ingest/releases/latest/download/clinvar_variant_edges.tsv'

## Ingests within this repository

alliance_gene:
config: 'ingests/alliance/gene.yaml'
alliance_gene_to_expression:
config: 'ingests/alliance/gene_to_expression.yaml'
alliance_gene_to_phenotype:
config: 'ingests/alliance/gene_to_phenotype.yaml'
# alliance_publication:
# config: 'ingests/alliance/publication.yaml'
bgee_gene_to_expression:
Expand Down Expand Up @@ -64,3 +86,4 @@ zfin_gene_to_phenotype:
config: 'ingests/zfin/gene_to_phenotype.yaml'
# zfin_publication_to_gene:
# config: 'ingests/zfin/publication_to_gene.yaml'

22 changes: 17 additions & 5 deletions src/monarch_ingest/qc_expect.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ nodes:
provided_by:
alliance_gene_nodes:
min: 290000
alliance_genotype_nodes:
min: 130000
dictybase_gene_nodes:
min: 14000
hgnc_gene_nodes:
Expand All @@ -14,12 +16,14 @@ nodes:
min: 5000
reactome_pathway_nodes:
min: 21000
clinvar_variant_nodes:
min: 1280000
clingen_variant_nodes:
min: 7000
edges:
provided_by:
alliance_gene_to_expression_edges:
min: 1870000
alliance_gene_to_phenotype_edges:
min: 300000
bgee_gene_to_expression_edges:
min: 430000
biogrid_edges:
Expand Down Expand Up @@ -49,8 +53,16 @@ edges:
reactome_gene_to_pathway_edges:
min: 200000
string_protein_links_edges:
min: 1490000
min: 1470000
xenbase_gene_to_phenotype_edges:
min: 2000
zfin_gene_to_phenotype_edges:
min: 148000
alliance_phenotype_edges:
min: 650000
alliance_disease_edges:
min: 10000
zfin_genotype_to_phenotype_edges:
min: 125000
clinvar_variant_edges:
min: 1400000
clingen_variant_edges:
min: 5000

0 comments on commit 8840883

Please sign in to comment.