format, update linting rule

monarch-initiative · May 16, 2024 · 4544837 · 4544837
1 parent e5084f1
commit 4544837
Show file tree

Hide file tree

Showing 81 changed files with 480 additions and 445 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -63,4 +63,7 @@ skip-string-normalization = true
 line-length = 120
 ignore = [
     "F541", # f-strings with no placeholders
-]
+
+]
+[tool.ruff.lint.per-file-ignores]
+"tests/**.py" = ["F811"] # redefinition of unused imports (mock_koza)
diff --git a/src/monarch_ingest/__init__.py b/src/monarch_ingest/__init__.py
@@ -1,2 +1,3 @@
 from importlib import metadata
-__version__ = metadata.version("monarch_ingest")
+
+__version__ = metadata.version("monarch_ingest")
diff --git a/src/monarch_ingest/cli_utils.py b/src/monarch_ingest/cli_utils.py
@@ -1,7 +1,6 @@
 import csv
 import gc
 import os
-import pkgutil
 import sys
 import tarfile
 import yaml
@@ -192,7 +191,7 @@ def transform_phenio(
                 "primary_knowledge_source",
                 "aggregator_knowledge_source",
                 "knowledge_level",
-                "agent_type"
+                "agent_type",
             ]
         ),
         axis=1,
@@ -524,15 +523,9 @@ def do_release(dir: str = OUTPUT_DIR, kghub: bool = False):
             )
             # index files on s3 after upload
             sh.multi_indexer(
-                *f"-v --prefix https://kghub.io/kg-monarch/ -b kg-hub-public-data -r kg-monarch -x".split(
-                    " "
-                )
-            )
-            sh.gsutil(
-                *f"-q -m cp -a public-read ./index.html s3://kg-hub-public-data/kg-monarch".split(
-                    " "
-                )
+                *f"-v --prefix https://kghub.io/kg-monarch/ -b kg-hub-public-data -r kg-monarch -x".split(" ")
             )
+            sh.gsutil(*f"-q -m cp -a public-read ./index.html s3://kg-hub-public-data/kg-monarch".split(" "))
 
         logger.debug("Cleaning up files...")
         sh.rm(f"output/{release_ver}")

diff --git a/src/monarch_ingest/ingests/alliance/gene.py b/src/monarch_ingest/ingests/alliance/gene.py
@@ -48,21 +48,18 @@
         id=gene_id,
         symbol=row["symbol"],
         name=row["symbol"],
-        full_name=row["name"].replace("\r",""), # Replacement to remove stray carriage returns in XenBase files
+        full_name=row["name"].replace("\r", ""),  # Replacement to remove stray carriage returns in XenBase files
         # No place in the schema for gene type (SO term) right now
         # type=row["soTermId"],
         in_taxon=[in_taxon],
         in_taxon_label=in_taxon_label,
-        provided_by=[source]
+        provided_by=[source],
     )
 
     if row["basicGeneticEntity"]["crossReferences"]:
-        gene.xref = [
-            koza_app.curie_cleaner.clean(xref["id"])
-            for xref in row["basicGeneticEntity"]["crossReferences"]
-        ]
+        gene.xref = [koza_app.curie_cleaner.clean(xref["id"]) for xref in row["basicGeneticEntity"]["crossReferences"]]
     if "synonyms" in row["basicGeneticEntity"].keys():
         # more handling for errant carriage returns
-        gene.synonym = [synonym.replace("\r","") for synonym in row["basicGeneticEntity"]["synonyms"] ]
+        gene.synonym = [synonym.replace("\r", "") for synonym in row["basicGeneticEntity"]["synonyms"]]
 
     koza_app.write(gene)
diff --git a/src/monarch_ingest/ingests/alliance/gene_to_phenotype.py b/src/monarch_ingest/ingests/alliance/gene_to_phenotype.py
@@ -5,7 +5,11 @@
 from koza.cli_utils import get_koza_app
 from source_translation import source_map
 
-from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation, KnowledgeLevelEnum, AgentTypeEnum
+from biolink_model.datamodel.pydanticmodel_v2 import (
+    GeneToPhenotypicFeatureAssociation,
+    KnowledgeLevelEnum,
+    AgentTypeEnum,
+)
 
 
 from loguru import logger
@@ -42,9 +46,8 @@
             publications=[row["evidence"]["publicationId"]],
             aggregator_knowledge_source=["infores:monarchinitiative", "infores:alliancegenome"],
             primary_knowledge_source=source,
-            knowledge_level = KnowledgeLevelEnum.knowledge_assertion,
-            agent_type = AgentTypeEnum.manual_agent
-
+            knowledge_level=KnowledgeLevelEnum.knowledge_assertion,
+            agent_type=AgentTypeEnum.manual_agent,
         )
 
         if "conditionRelations" in row.keys() and row["conditionRelations"] is not None:

diff --git a/src/monarch_ingest/ingests/alliance/publication.py b/src/monarch_ingest/ingests/alliance/publication.py
@@ -9,9 +9,7 @@
 while (row := koza_app.get_row()) is not None:
 
     # TODO: remove DOI exclusion once curie regex can handle them
-    xrefs = [
-        xref["id"] for xref in row["crossReferences"] if not xref["id"].startswith("DOI:")
-    ]
+    xrefs = [xref["id"] for xref in row["crossReferences"] if not xref["id"].startswith("DOI:")]
 
     # Parse creation date for different time formats
     creation_date = row["datePublished"]
@@ -21,10 +19,12 @@
         creation_date = None
 
     source: str
-    if 'MODReferenceTypes' in row and \
-            len(row['MODReferenceTypes']) > 0 and \
-            'source' in row['MODReferenceTypes'][0] and \
-            row['MODReferenceTypes'][0]['source'] in source_map:
+    if (
+        'MODReferenceTypes' in row
+        and len(row['MODReferenceTypes']) > 0
+        and 'source' in row['MODReferenceTypes'][0]
+        and row['MODReferenceTypes'][0]['source'] in source_map
+    ):
         source = source_map[row['MODReferenceTypes'][0]['source']]
     else:  # default source
         source = "infores:alliancegenome"
@@ -36,7 +36,7 @@
         xref=xrefs,
         type=[koza_app.translation_table.resolve_term("publication")],
         creation_date=creation_date,
-        provided_by=[source]
+        provided_by=[source],
     )
 
     if "authors" in row.keys():

diff --git a/src/monarch_ingest/ingests/bgee/gene_to_expression_utils.py b/src/monarch_ingest/ingests/bgee/gene_to_expression_utils.py
@@ -8,17 +8,17 @@
 def filter_group_by_rank(rows: List, col: str, largest_n: int = 0, smallest_n: int = 0) -> List[Dict]:
     """Function to filter a group of Koza rows by values largest or smallest values in column:
 
-        Get the top and/or bottom n rows ranked based on column:
+    Get the top and/or bottom n rows ranked based on column:
 
-        Args:
-            rows (List): The Koza object to read rows from.
-            col (str): The column to perform ranking and filtering.
-            largest_n (int): The number of rows to return from the largest ranking.
-            smallest_n (int): The number of rows to return from the smallest ranking.
+    Args:
+        rows (List): The Koza object to read rows from.
+        col (str): The column to perform ranking and filtering.
+        largest_n (int): The number of rows to return from the largest ranking.
+        smallest_n (int): The number of rows to return from the smallest ranking.
 
 
-        Returns:
-            List[Dict]: Returns a list of n rows in Koza dict format sorted by rank in column.
+    Returns:
+        List[Dict]: Returns a list of n rows in Koza dict format sorted by rank in column.
     """
     df = pd.DataFrame(rows)
     largest_df = df.nlargest(largest_n, col, keep="first")
@@ -29,11 +29,11 @@ def filter_group_by_rank(rows: List, col: str, largest_n: int = 0, smallest_n: i
 def write_group(rows: List, koza_app: KozaApp):
     """Function to write a group of Koza rows to KozaApp object output:
 
-        Write list of rows in Koza format to KozaApp output:
+    Write list of rows in Koza format to KozaApp output:
 
-        Args:
-            rows (List): A list of rows to output to KozaApp.
-            koza_app (KozaApp): The KozaApp to use for output of rows.
+    Args:
+        rows (List): A list of rows to output to KozaApp.
+        koza_app (KozaApp): The KozaApp to use for output of rows.
     """
     for row in rows:
         association = GeneToExpressionSiteAssociation(
@@ -44,23 +44,24 @@ def write_group(rows: List, koza_app: KozaApp):
             primary_knowledge_source="infores:bgee",
             aggregator_knowledge_source=["infores:monarchinitiative"],
             knowledge_level=KnowledgeLevelEnum.knowledge_assertion,
-            agent_type=AgentTypeEnum.not_provided)
+            agent_type=AgentTypeEnum.not_provided,
+        )
 
         koza_app.write(association)
 
 
 def get_row_group(koza_app: KozaApp, col: str = 'Gene ID') -> Union[List, None]:
     """Function to read a group of Koza rows from a KozaApp:
 
-        Get a group of rows from KozaApp grouped on column:
+    Get a group of rows from KozaApp grouped on column:
 
-        Args:
-            koza_app (KozaApp): The Koza object to read rows from.
-            col (str): The column to group rows based on.
+    Args:
+        koza_app (KozaApp): The Koza object to read rows from.
+        col (str): The column to group rows based on.
 
 
-        Returns:
-            List/None: Returns a list of rows in Koza dict format grouped by column.
+    Returns:
+        List/None: Returns a list of rows in Koza dict format grouped by column.
     """
     if not hasattr(koza_app, 'previous_row'):
         koza_app.previous_row = koza_app.get_row()
@@ -81,11 +82,11 @@ def get_row_group(koza_app: KozaApp, col: str = 'Gene ID') -> Union[List, None]:
 def process_koza_source(koza_app: KozaApp):
     """Function to filter a group of Koza rows by values largest or smallest values in column:
 
-        Get the top and/or bottom n rows ranked based on column:
+    Get the top and/or bottom n rows ranked based on column:
 
-        Args:
-            koza_app (KozaApp): The Koza object to process for ingest.
+    Args:
+        koza_app (KozaApp): The Koza object to process for ingest.
     """
-    while(row_group := get_row_group(koza_app)) is not None:
+    while (row_group := get_row_group(koza_app)) is not None:
         rank_filtered_rows = filter_group_by_rank(row_group, col='Expression rank', smallest_n=10)
         write_group(rank_filtered_rows, koza_app)
diff --git a/src/monarch_ingest/ingests/biogrid/biogrid.py b/src/monarch_ingest/ingests/biogrid/biogrid.py
@@ -15,8 +15,12 @@
     publications = get_publication_ids(row['Publication Identifiers'])
 
     # Only keep interactions using NCBIGene or UniProtKB identifiers, could also filter on taxid
-    if gid_a.startswith("NCBIGene:") or gid_a.startswith("UniProtKB:") \
-            and gid_b.startswith("NCBIGene:") or gid_b.startswith("UniProtKB:"):
+    if (
+        gid_a.startswith("NCBIGene:")
+        or gid_a.startswith("UniProtKB:")
+        and gid_b.startswith("NCBIGene:")
+        or gid_b.startswith("UniProtKB:")
+    ):
         association = PairwiseGeneToGeneInteraction(
             id="uuid:" + str(uuid.uuid1()),
             subject=gid_a,
@@ -27,7 +31,7 @@
             primary_knowledge_source="infores:biogrid",
             aggregator_knowledge_source=["infores:monarchinitiative"],
             knowledge_level=KnowledgeLevelEnum.knowledge_assertion,
-            agent_type=AgentTypeEnum.not_provided
+            agent_type=AgentTypeEnum.not_provided,
         )
 
         koza_app.write(association)
diff --git a/src/monarch_ingest/ingests/biogrid/biogrid_util.py b/src/monarch_ingest/ingests/biogrid/biogrid_util.py
@@ -1,7 +1,7 @@
 """
 Some functions to assist parsing of BioGRID fields.
 """
-from sys import stderr
+
 from typing import List, Optional
 
 from loguru import logger
@@ -14,9 +14,7 @@ def get_gene_id(raw_id: str) -> str:
     :param raw_id: str, raw BioGRID input string (a pseudo-CURIE)
     :return:
     """
-    gid = (raw_id
-           .replace("entrez gene/locuslink:", "NCBIGene:")
-           .replace("uniprot/swiss-prot:", "UniProtKB:"))
+    gid = raw_id.replace("entrez gene/locuslink:", "NCBIGene:").replace("uniprot/swiss-prot:", "UniProtKB:")
 
     return gid
 
@@ -27,14 +25,14 @@ def get_gene_id(raw_id: str) -> str:
     "two hybrid": "ECO:0000024",
     "affinity chromatography technology": "ECO:0000079",
     "genetic interference": "ECO:0000011",
-    "pull down": "ECO:0000025",   # not totally sure about this one
+    "pull down": "ECO:0000025",  # not totally sure about this one
     "enzymatic study": "ECO:0000005",
     "x-ray crystallography": "ECO:0001823",
     "far western blotting": "ECO:0000076",
     "fluorescent resonance energy transfer": "ECO:0001048",
-    "imaging technique": "ECO:0000324",   # not totally sure about this one
+    "imaging technique": "ECO:0000324",  # not totally sure about this one
     "protein complementation assay": "ECO:0006256",  # not totally sure about this one
-    "biochemical": "ECO:0000172"  # not totally sure about this one
+    "biochemical": "ECO:0000172",  # not totally sure about this one
 }
 
 
@@ -54,11 +52,13 @@ def get_evidence(methods: str) -> Optional[List[str]]:
             # databaseName:identifier(methodName)
             method = method.rstrip(")").split('(')[-1]
             if method not in EVIDENCE_CODE_MAPPINGS.keys():
-                err_msg = f"Unknown interaction detection method '{method}'. " +\
-                          "Assigning default code ECO:0000006 == 'experimental evidence', the ECO root."
+                err_msg = (
+                    f"Unknown interaction detection method '{method}'. "
+                    + "Assigning default code ECO:0000006 == 'experimental evidence', the ECO root."
+                )
                 logger.warning(err_msg)
                 EVIDENCE_CODE_MAPPINGS[method] = "ECO:0000006"
-                
+
             evidence_codes.append(EVIDENCE_CODE_MAPPINGS[method])
 
     return evidence_codes if evidence_codes else None

diff --git a/src/monarch_ingest/ingests/ctd/chemical_to_disease.py b/src/monarch_ingest/ingests/ctd/chemical_to_disease.py
@@ -2,8 +2,11 @@
 
 from koza.cli_utils import get_koza_app
 
-from biolink_model.datamodel.pydanticmodel_v2 import ChemicalToDiseaseOrPhenotypicFeatureAssociation, \
-    KnowledgeLevelEnum, AgentTypeEnum
+from biolink_model.datamodel.pydanticmodel_v2 import (
+    ChemicalToDiseaseOrPhenotypicFeatureAssociation,
+    KnowledgeLevelEnum,
+    AgentTypeEnum,
+)
 from monarch_ingest.constants import BIOLINK_TREATS_OR_APPLIED_OR_STUDIED_TO_TREAT
 
 koza_app = get_koza_app("ctd_chemical_to_disease")
@@ -28,7 +31,7 @@
             aggregator_knowledge_source=["infores:monarchinitiative"],
             primary_knowledge_source="infores:ctd",
             knowledge_level=KnowledgeLevelEnum.knowledge_assertion,
-            agent_type=AgentTypeEnum.manual_agent
+            agent_type=AgentTypeEnum.manual_agent,
         )
 
         koza_app.write(association)
diff --git a/src/monarch_ingest/ingests/dictybase/gene.py b/src/monarch_ingest/ingests/dictybase/gene.py
@@ -9,12 +9,11 @@
 in_taxon_label = taxon_labels[in_taxon]['label'] if in_taxon in taxon_labels else "Dictyostelium discoideum"
 
 while (row := koza_app.get_row()) is not None:
-    
+
     synonyms = []
     if row['Synonyms'] is not None:
         synonyms = row['Synonyms'].split(", ")
 
-
     gene = Gene(
         id='dictyBase:' + row['GENE ID'],
         symbol=row['Gene Name'],
@@ -23,7 +22,7 @@
         synonym=synonyms,
         in_taxon=[in_taxon],
         in_taxon_label=in_taxon_label,
-        provided_by=["infores:dictybase"]
+        provided_by=["infores:dictybase"],
     )
 
     koza_app.write(gene)
diff --git a/src/monarch_ingest/ingests/dictybase/gene_to_phenotype.py b/src/monarch_ingest/ingests/dictybase/gene_to_phenotype.py
@@ -1,11 +1,13 @@
 import uuid
-from typing import Optional, Tuple
 
 from koza.cli_utils import get_koza_app
-from monarch_ingest.ingests.dictybase.utils import parse_gene_id, parse_phenotypes
+from monarch_ingest.ingests.dictybase.utils import parse_phenotypes
 
-from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation, KnowledgeLevelEnum, \
-    AgentTypeEnum
+from biolink_model.datamodel.pydanticmodel_v2 import (
+    GeneToPhenotypicFeatureAssociation,
+    KnowledgeLevelEnum,
+    AgentTypeEnum,
+)
 
 koza_app = get_koza_app("dictybase_gene_to_phenotype")
 
@@ -32,7 +34,7 @@
                 aggregator_knowledge_source=["infores:monarchinitiative"],
                 primary_knowledge_source="infores:dictybase",
                 knowledge_level=KnowledgeLevelEnum.knowledge_assertion,
-                agent_type=AgentTypeEnum.manual_agent
+                agent_type=AgentTypeEnum.manual_agent,
             )
 
             koza_app.write(association)
diff --git a/src/monarch_ingest/ingests/dictybase/utils.py b/src/monarch_ingest/ingests/dictybase/utils.py
@@ -1,6 +1,7 @@
 """
 A few Dictybase parse utility functions
 """
+
 from typing import Optional, Tuple, Dict, List