Skip to content

Commit

Permalink
Merge pull request #506 from monarch-initiative/exclude-phenio-edges-…
Browse files Browse the repository at this point in the history
…with-nulls

Exclude phenio edges with null subject/object
  • Loading branch information
kevinschaper committed Aug 25, 2023
2 parents dde2168 + 99cabad commit 6b91005
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 5 deletions.
8 changes: 3 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,15 @@ packages = [
python = ">=3.10,<3.12"
kghub-downloader = "^0.3.2"
koza = "^0.3.0"
cat-merge = "^0.1.19"
cat-merge = "0.1.20"
closurizer = "0.3.0"
#monarch-py = "^0.13.2"
monarch-py = {git = "https://github.com/monarch-initiative/monarch-app.git", branch = "taxon-labels", subdirectory = "backend"}
monarch-py = "0.15.1"
kgx = "^2.1.0"
multi-indexer = "*"
biolink-model = "3.5.0"
linkml = "^1.5.5"
linkml = "^1.5.7"
#linkml-solr = "^0.1.3"
linkml-solr = {git = "https://github.com/linkml/linkml-solr.git", branch = "top_class"}
greenlet = "^1.1.2"
importlib-metadata = "^4.6.1"
sh = "^1.14.3"
typer = "^0.7"
Expand Down
3 changes: 3 additions & 0 deletions src/monarch_ingest/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@ def transform_phenio(
edges_df = edges_df[~edges_df["subject"].str.startswith(tuple(exclude_prefixes))
& ~edges_df["object"].str.startswith(tuple(exclude_prefixes))]

# Remove edges where the subject or object is NA
edges_df = edges_df[~edges_df["subject"].isna() & ~edges_df["object"].isna()]

valid_predicates = {f"biolink:{convert_to_snake_case(pred)}" for pred in biolink_model_schema.slot_descendants("related to")}
phenio_predicates = set(edges_df['predicate'].unique())
invalid_predicates = phenio_predicates - valid_predicates
Expand Down

0 comments on commit 6b91005

Please sign in to comment.