Skip to content

Commit

Permalink
Merge pull request #584 from monarch-initiative/issue-657-add-klat
Browse files Browse the repository at this point in the history
Add knowledge level & agent type, plus constraint improvements
  • Loading branch information
kevinschaper committed Apr 17, 2024
2 parents b1d2ea2 + efdea05 commit 055b188
Show file tree
Hide file tree
Showing 60 changed files with 297 additions and 100 deletions.
53 changes: 15 additions & 38 deletions poetry.lock

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,14 @@ packages = [
python = ">=3.10,<3.12"
# biolink-model = "^4.2.0"
# When 4.2.0 (or any release after 4.1.6) is released, we can remove the git dependency
biolink-model = { git = "https://github.com/biolink/biolink-model", tag = "v4.2.0-rc.2" }
biolink-model = { git = "https://github.com/biolink/biolink-model", branch = "master" }
bmt = "^1.0.15"
cat-merge = "0.2.1"
closurizer = "0.5.1"
kghub-downloader = "^0.3.2"
kgx = { git = "https://github.com/biolink/kgx", branch = "master" } # ">=2.1"
koza = ">=0.5.2"
linkml = "^1.6.3"
linkml-runtime = "1.6.2"
linkml = "1.6.3"
linkml-solr = "0.1.5" # "^0.1.3"
multi-indexer = "0.0.5"
# Other Dependencies
Expand All @@ -38,6 +37,7 @@ sh = "^1.14.3"
typer = "^0.7"
typer-cli = "^0.0.13"
yamllint = "^1.35.1"
linkml-runtime = "1.6.3"

[tool.poetry.group.dev]
optional = true
Expand Down
4 changes: 2 additions & 2 deletions scripts/load_solr.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ echo "Download the schema from monarch-py"

# retrieve the schema from the main branch on monarch-app

curl -O https://raw.githubusercontent.com/monarch-initiative/monarch-app/main/backend/src/monarch_py/datamodels/model.yaml
curl -O https://raw.githubusercontent.com/monarch-initiative/monarch-app/main/backend/src/monarch_py/datamodels/similarity.yaml
curl -O https://raw.githubusercontent.com/monarch-initiative/monarch-app/issue-675-add-kl-at/backend/src/monarch_py/datamodels/model.yaml
curl -O https://raw.githubusercontent.com/monarch-initiative/monarch-app/issue-675-add-kl-at/backend/src/monarch_py/datamodels/similarity.yaml

echo "Starting the server"
poetry run lsolr start-server
Expand Down
16 changes: 15 additions & 1 deletion src/monarch_ingest/cli_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import csv
import gc
import os
import pkgutil
import sys
import tarfile
import yaml
Expand Down Expand Up @@ -188,15 +189,28 @@ def transform_phenio(
"predicate",
"object",
"category",
"relation",
"primary_knowledge_source",
"aggregator_knowledge_source",
"knowledge_level",
"agent_type"
]
),
axis=1,
inplace=True,
)

# if knowledge level doesn't exist, add it and assign to knowledge_assertion
if "knowledge_level" not in edges_df.columns:
edges_df["knowledge_level"] = "knowledge_assertion"
# same for agent_type, setting it to manual_agent
if "agent_type" not in edges_df.columns:
edges_df["agent_type"] = "manual_agent"

# prepend infores:monarchinitiative to the aggregator_knowledge_source column for edges that don't have it
edges_df["aggregator_knowledge_source"] = edges_df["aggregator_knowledge_source"].apply(
lambda x: f"infores:monarchinitiative|{x}" if not x.startswith("infores:monarchinitiative") else x
)

edges_df = edges_df[edges_df["predicate"].str.contains(":")]

# assign level association category if edge category is empty
Expand Down
5 changes: 3 additions & 2 deletions src/monarch_ingest/ingests/alliance/gene.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
id=gene_id,
symbol=row["symbol"],
name=row["symbol"],
full_name=row["name"],
full_name=row["name"].replace("\r",""), # Replacement to remove stray carriage returns in XenBase files
# No place in the schema for gene type (SO term) right now
# type=row["soTermId"],
in_taxon=[in_taxon],
Expand All @@ -62,6 +62,7 @@
for xref in row["basicGeneticEntity"]["crossReferences"]
]
if "synonyms" in row["basicGeneticEntity"].keys():
gene.synonym = row["basicGeneticEntity"]["synonyms"]
# more handling for errant carriage returns
gene.synonym = [synonym.replace("\r","") for synonym in row["basicGeneticEntity"]["synonyms"] ]

koza_app.write(gene)
10 changes: 7 additions & 3 deletions src/monarch_ingest/ingests/alliance/gene_to_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from koza.cli_runner import get_koza_app
from source_translation import source_map

from biolink_model.datamodel.pydanticmodel_v2 import GeneToExpressionSiteAssociation
from biolink_model.datamodel.pydanticmodel_v2 import GeneToExpressionSiteAssociation, KnowledgeLevelEnum, AgentTypeEnum

from monarch_ingest.ingests.alliance.utils import get_data

Expand Down Expand Up @@ -54,7 +54,9 @@
qualifiers=([get_data(row, "assay")] if get_data(row, "assay") else None),
publications=publication_ids,
aggregator_knowledge_source=["infores:monarchinitiative", "infores:alliancegenome"],
primary_knowledge_source=source
primary_knowledge_source=source,
knowledge_level=KnowledgeLevelEnum.knowledge_assertion,
agent_type=AgentTypeEnum.manual_agent
)
)

Expand All @@ -71,7 +73,9 @@
qualifiers=([get_data(row, "assay")] if get_data(row, "assay") else None),
publications=publication_ids,
aggregator_knowledge_source=["infores:monarchinitiative", "infores:alliancegenome"],
primary_knowledge_source=source
primary_knowledge_source=source,
knowledge_level=KnowledgeLevelEnum.knowledge_assertion,
agent_type=AgentTypeEnum.manual_agent
)
)
else:
Expand Down
2 changes: 2 additions & 0 deletions src/monarch_ingest/ingests/alliance/gene_to_expression.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,7 @@ edge_properties:
- 'publications'
- 'aggregator_knowledge_source'
- 'primary_knowledge_source'
- 'knowledge_level'
- 'agent_type'

transform_mode: 'flat'
7 changes: 5 additions & 2 deletions src/monarch_ingest/ingests/alliance/gene_to_phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from koza.cli_runner import get_koza_app
from source_translation import source_map

from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation
from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation, KnowledgeLevelEnum, AgentTypeEnum


from loguru import logger
Expand Down Expand Up @@ -41,7 +41,10 @@
object=phenotypic_feature_id,
publications=[row["evidence"]["publicationId"]],
aggregator_knowledge_source=["infores:monarchinitiative", "infores:alliancegenome"],
primary_knowledge_source=source
primary_knowledge_source=source,
knowledge_level = KnowledgeLevelEnum.knowledge_assertion,
agent_type = AgentTypeEnum.manual_agent

)

if "conditionRelations" in row.keys() and row["conditionRelations"] is not None:
Expand Down
2 changes: 2 additions & 0 deletions src/monarch_ingest/ingests/alliance/gene_to_phenotype.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,7 @@ edge_properties:
- 'publications'
- 'aggregator_knowledge_source'
- 'primary_knowledge_source'
- 'knowledge_level'
- 'agent_type'

transform_mode: 'flat'
2 changes: 2 additions & 0 deletions src/monarch_ingest/ingests/bgee/gene_to_expression.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,7 @@ edge_properties:
- 'object'
- 'primary_knowledge_source'
- 'aggregator_knowledge_source'
- 'knowledge_level'
- 'agent_type'

transform_mode: 'flat'
6 changes: 4 additions & 2 deletions src/monarch_ingest/ingests/bgee/gene_to_expression_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pandas as pd
from typing import Dict, List, Union
from koza.app import KozaApp
from biolink_model.datamodel.pydanticmodel_v2 import GeneToExpressionSiteAssociation
from biolink_model.datamodel.pydanticmodel_v2 import GeneToExpressionSiteAssociation, KnowledgeLevelEnum, AgentTypeEnum


def filter_group_by_rank(rows: List, col: str, largest_n: int = 0, smallest_n: int = 0) -> List[Dict]:
Expand Down Expand Up @@ -42,7 +42,9 @@ def write_group(rows: List, koza_app: KozaApp):
predicate='biolink:expressed_in',
object=row['Anatomical entity ID'],
primary_knowledge_source="infores:bgee",
aggregator_knowledge_source=["infores:monarchinitiative"])
aggregator_knowledge_source=["infores:monarchinitiative"],
knowledge_level=KnowledgeLevelEnum.knowledge_assertion,
agent_type=AgentTypeEnum.not_provided)

koza_app.write(association)

Expand Down
6 changes: 4 additions & 2 deletions src/monarch_ingest/ingests/biogrid/biogrid.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import uuid
from koza.cli_runner import get_koza_app
from biolink_model.datamodel.pydanticmodel_v2 import PairwiseGeneToGeneInteraction
from biolink_model.datamodel.pydanticmodel_v2 import PairwiseGeneToGeneInteraction, KnowledgeLevelEnum, AgentTypeEnum
from biogrid_util import get_gene_id, get_evidence, get_publication_ids

koza_app = get_koza_app("biogrid")
Expand All @@ -25,7 +25,9 @@
has_evidence=evidence,
publications=publications,
primary_knowledge_source="infores:biogrid",
aggregator_knowledge_source=["infores:monarchinitiative"]
aggregator_knowledge_source=["infores:monarchinitiative"],
knowledge_level=KnowledgeLevelEnum.knowledge_assertion,
agent_type=AgentTypeEnum.not_provided
)

koza_app.write(association)
2 changes: 2 additions & 0 deletions src/monarch_ingest/ingests/biogrid/biogrid.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,7 @@ edge_properties:
- 'publications'
- 'primary_knowledge_source'
- 'aggregator_knowledge_source'
- 'knowledge_level'
- 'agent_type'

transform_mode: 'flat'
2 changes: 1 addition & 1 deletion src/monarch_ingest/ingests/biogrid/biogrid_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def get_evidence(methods: str) -> Optional[List[str]]:
method = method.rstrip(")").split('(')[-1]
if method not in EVIDENCE_CODE_MAPPINGS.keys():
err_msg = f"Unknown interaction detection method '{method}'. " +\
"Assigning default code ECO:0000006 == 'experimental evidence'."
"Assigning default code ECO:0000006 == 'experimental evidence', the ECO root."
logger.warning(err_msg)
EVIDENCE_CODE_MAPPINGS[method] = "ECO:0000006"

Expand Down
7 changes: 5 additions & 2 deletions src/monarch_ingest/ingests/ctd/chemical_to_disease.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

from koza.cli_runner import get_koza_app

from biolink_model.datamodel.pydanticmodel_v2 import ChemicalToDiseaseOrPhenotypicFeatureAssociation
from biolink_model.datamodel.pydanticmodel_v2 import ChemicalToDiseaseOrPhenotypicFeatureAssociation, \
KnowledgeLevelEnum, AgentTypeEnum
from monarch_ingest.constants import BIOLINK_TREATS_OR_APPLIED_OR_STUDIED_TO_TREAT

koza_app = get_koza_app("ctd_chemical_to_disease")
Expand All @@ -25,7 +26,9 @@
object=disease_id,
publications=["PMID:" + p for p in row['PubMedIDs'].split("|")],
aggregator_knowledge_source=["infores:monarchinitiative"],
primary_knowledge_source="infores:ctd"
primary_knowledge_source="infores:ctd",
knowledge_level=KnowledgeLevelEnum.knowledge_assertion,
agent_type=AgentTypeEnum.manual_agent
)

koza_app.write(association)
2 changes: 2 additions & 0 deletions src/monarch_ingest/ingests/ctd/chemical_to_disease.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,5 @@ edge_properties:
- 'publications'
- 'aggregator_knowledge_source'
- 'primary_knowledge_source'
- 'knowledge_level'
- 'agent_type'
7 changes: 5 additions & 2 deletions src/monarch_ingest/ingests/dictybase/gene_to_phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from koza.cli_runner import get_koza_app
from monarch_ingest.ingests.dictybase.utils import parse_gene_id, parse_phenotypes

from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation
from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation, KnowledgeLevelEnum, \
AgentTypeEnum

koza_app = get_koza_app("dictybase_gene_to_phenotype")

Expand All @@ -29,7 +30,9 @@
predicate='biolink:has_phenotype',
object=phenotype_id,
aggregator_knowledge_source=["infores:monarchinitiative"],
primary_knowledge_source="infores:dictybase"
primary_knowledge_source="infores:dictybase",
knowledge_level=KnowledgeLevelEnum.knowledge_assertion,
agent_type=AgentTypeEnum.manual_agent
)

koza_app.write(association)
2 changes: 2 additions & 0 deletions src/monarch_ingest/ingests/dictybase/gene_to_phenotype.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,7 @@ edge_properties:
- 'object'
- 'aggregator_knowledge_source'
- 'primary_knowledge_source'
- 'knowledge_level'
- 'agent_type'

transform_mode: 'flat'
5 changes: 4 additions & 1 deletion src/monarch_ingest/ingests/flybase/publication_to_gene.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

from koza.cli_runner import get_koza_app

from biolink_model.datamodel.pydanticmodel_v2 import InformationContentEntityToNamedThingAssociation
from biolink_model.datamodel.pydanticmodel_v2 import InformationContentEntityToNamedThingAssociation, AgentTypeEnum, \
KnowledgeLevelEnum

koza_app = get_koza_app("flybase_publication_to_gene")

Expand All @@ -24,6 +25,8 @@
object=publication_id,
aggregator_knowledge_source=["infores:monarchinitiative"],
primary_knowledge_source="infores:flybase",
knowledge_level=KnowledgeLevelEnum.knowledge_assertion,
agent_type=AgentTypeEnum.manual_agent
)

koza_app.write(association)
2 changes: 2 additions & 0 deletions src/monarch_ingest/ingests/flybase/publication_to_gene.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,7 @@ edge_properties:
- 'object'
- 'aggregator_knowledge_source'
- 'primary_knowledge_source'
- 'knowledge_level'
- 'agent_type'

transform_mode: 'flat'
5 changes: 4 additions & 1 deletion src/monarch_ingest/ingests/go/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""
import uuid

from biolink_model.datamodel.pydanticmodel_v2 import KnowledgeLevelEnum, AgentTypeEnum
from koza.cli_runner import get_koza_app

from monarch_ingest.ingests.go.annotation_utils import (
Expand Down Expand Up @@ -119,7 +120,9 @@
has_evidence=[eco_term],
# subject_context_qualifier=ncbitaxa, # Biolink Pydantic model support missing for this slot
aggregator_knowledge_source=["infores:monarchinitiative"],
primary_knowledge_source=assigned_by
primary_knowledge_source=assigned_by,
knowledge_level=KnowledgeLevelEnum.knowledge_assertion,
agent_type=AgentTypeEnum.manual_agent
)

# Write the captured Association out
Expand Down
2 changes: 2 additions & 0 deletions src/monarch_ingest/ingests/go/annotation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,5 +85,7 @@ edge_properties:
- 'object'
- 'aggregator_knowledge_source'
- 'primary_knowledge_source'
- 'knowledge_level'
- 'agent_type'

transform_mode: 'flat'
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@

from koza.cli_runner import get_koza_app

from biolink_model.datamodel.pydanticmodel_v2 import DiseaseOrPhenotypicFeatureToGeneticInheritanceAssociation

from biolink_model.datamodel.pydanticmodel_v2 import DiseaseOrPhenotypicFeatureToGeneticInheritanceAssociation, \
KnowledgeLevelEnum, AgentTypeEnum

from loguru import logger

Expand Down Expand Up @@ -71,7 +71,9 @@
publications=publications,
has_evidence=[evidence_curie],
aggregator_knowledge_source=["infores:monarchinitiative"],
primary_knowledge_source="infores:hpo-annotations"
primary_knowledge_source="infores:hpo-annotations",
knowledge_level=KnowledgeLevelEnum.knowledge_assertion,
agent_type=AgentTypeEnum.manual_agent
)
koza_app.write(association)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,5 @@ edge_properties:
- 'has_evidence'
- 'aggregator_knowledge_source'
- 'primary_knowledge_source'
- 'knowledge_level'
- 'agent_type'
8 changes: 6 additions & 2 deletions src/monarch_ingest/ingests/hpoa/disease_to_phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@

from koza.cli_runner import get_koza_app

from biolink_model.datamodel.pydanticmodel_v2 import DiseaseToPhenotypicFeatureAssociation
from biolink_model.datamodel.pydanticmodel_v2 import DiseaseToPhenotypicFeatureAssociation, KnowledgeLevelEnum, \
AgentTypeEnum
from monarch_ingest.ingests.hpoa.hpoa_utils import phenotype_frequency_to_hpo_term, FrequencyHpoTerm, Frequency

from loguru import logger
Expand Down Expand Up @@ -96,6 +97,9 @@
has_count=frequency.has_count,
has_total=frequency.has_total,
aggregator_knowledge_source=["infores:monarchinitiative"],
primary_knowledge_source="infores:hpo-annotations"
primary_knowledge_source="infores:hpo-annotations",
knowledge_level=KnowledgeLevelEnum.knowledge_assertion,
agent_type=AgentTypeEnum.manual_agent

)
koza_app.write(association)
2 changes: 2 additions & 0 deletions src/monarch_ingest/ingests/hpoa/disease_to_phenotype.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,5 @@ edge_properties:
- 'has_evidence'
- 'aggregator_knowledge_source'
- 'primary_knowledge_source'
- 'knowledge_level'
- 'agent_type'
Loading

0 comments on commit 055b188

Please sign in to comment.