From 1d0b7b51189a9f13746b05145ff8a61ecba0c50a Mon Sep 17 00:00:00 2001 From: Roselyn Huynh Date: Tue, 8 Apr 2025 10:32:07 -0400 Subject: [PATCH 1/6] replace Union with | --- polaris/dataset/converters/_base.py | 4 ++-- polaris/dataset/converters/_pdb.py | 4 ++-- polaris/evaluate/metrics/docking_metrics.py | 4 ++-- polaris/hub/settings.py | 3 +-- polaris/utils/types.py | 10 +++++----- 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/polaris/dataset/converters/_base.py b/polaris/dataset/converters/_base.py index dbd836c0..8ed58f4d 100644 --- a/polaris/dataset/converters/_base.py +++ b/polaris/dataset/converters/_base.py @@ -1,5 +1,5 @@ import abc -from typing import Dict, Tuple, TypeAlias, Union +from typing import Dict, Tuple, TypeAlias import pandas as pd @@ -17,7 +17,7 @@ def convert(self, path: str, append: bool = False) -> FactoryProduct: raise NotImplementedError @staticmethod - def get_pointer(column: str, index: Union[int, slice]) -> str: + def get_pointer(column: str, index: int | slice) -> str: """ Creates a pointer. diff --git a/polaris/dataset/converters/_pdb.py b/polaris/dataset/converters/_pdb.py index db9f4df0..665aa00e 100644 --- a/polaris/dataset/converters/_pdb.py +++ b/polaris/dataset/converters/_pdb.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import TYPE_CHECKING, Optional, Sequence, Union +from typing import TYPE_CHECKING, Optional, Sequence import fastpdb import numpy as np @@ -156,7 +156,7 @@ def _load_pdb(self, path: str, pdb_pointer=None) -> dict: return pdb_dict def _convert_pdb( - self, path: str, factory: "DatasetFactory", pdb_pointer: Union[str, int], append: bool = False + self, path: str, factory: "DatasetFactory", pdb_pointer: str | int, append: bool = False ) -> FactoryProduct: """ Convert a single pdb to zarr file diff --git a/polaris/evaluate/metrics/docking_metrics.py b/polaris/evaluate/metrics/docking_metrics.py index afa5d3fa..26b5b6d5 100644 --- a/polaris/evaluate/metrics/docking_metrics.py +++ b/polaris/evaluate/metrics/docking_metrics.py @@ -1,6 +1,6 @@ # This script includes docking related evaluation metrics. -from typing import Union, List +from typing import List import numpy as np from rdkit.Chem.rdMolAlign import CalcRMS @@ -36,7 +36,7 @@ def _rmsd(mol_probe: dm.Mol, mol_ref: dm.Mol) -> float: ) -def rmsd_coverage(y_pred: Union[str, List[dm.Mol]], y_true: Union[str, list[dm.Mol]], max_rsmd: float = 2): +def rmsd_coverage(y_pred: str | List[dm.Mol], y_true: str | list[dm.Mol], max_rsmd: float = 2): """ Calculate the coverage of molecules with an RMSD less than a threshold (2 Å by default) compared to the reference molecule conformer. diff --git a/polaris/hub/settings.py b/polaris/hub/settings.py index f52693cc..042b7d59 100644 --- a/polaris/hub/settings.py +++ b/polaris/hub/settings.py @@ -1,4 +1,3 @@ -from typing import Union from urllib.parse import urljoin from pydantic import ValidationInfo, field_validator @@ -57,7 +56,7 @@ class PolarisHubSettings(BaseSettings): client_id: str = "agQP2xVM6JqMHvGc" # Networking settings - ca_bundle: Union[str, bool, None] = None + ca_bundle: str | bool | None = None default_timeout: TimeoutTypes = (10, 200) @field_validator("api_url", mode="before") diff --git a/polaris/utils/types.py b/polaris/utils/types.py index fcdf196f..34c26bde 100644 --- a/polaris/utils/types.py +++ b/polaris/utils/types.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Annotated, Any, Literal, Optional, Tuple, Union +from typing import Annotated, Any, Literal, Optional, Tuple import numpy as np from pydantic import ( @@ -19,7 +19,7 @@ A split is defined by a sequence of integers. """ -SplitType: TypeAlias = tuple[SplitIndicesType, Union[SplitIndicesType, dict[str, SplitIndicesType]]] +SplitType: TypeAlias = tuple[SplitIndicesType, SplitIndicesType | dict[str, SplitIndicesType]] """ A split is a pair of which the first item is always assumed to be the train set. The second item can either be a single test set or a dictionary with multiple, named test sets. @@ -47,7 +47,7 @@ that looks like {"test_set_name": {"target_name": np.ndarray}}. """ -DatapointPartType = Union[Any, tuple[Any], dict[str, Any]] +DatapointPartType = Any | tuple[Any] | dict[str, Any] DatapointType: TypeAlias = tuple[DatapointPartType, DatapointPartType] """ A datapoint has: @@ -109,7 +109,7 @@ Type to specify access to a dataset, benchmark or result in the Hub. """ -TimeoutTypes = Union[Tuple[int, int], Literal["timeout", "never"]] +TimeoutTypes = Tuple[int, int] | Literal["timeout", "never"] """ Timeout types for specifying maximum wait times. """ @@ -150,7 +150,7 @@ - A single row, e.g. dataset[0] - Specify a specific value, e.g. dataset[0, "col1"] -There are more exciting options we could implement, such as slicing, +There are more exciting options we could implement, such as slicing, but this gets complex. """ From e7e349f9e02b27face57d7f0940427be7e3db7fb Mon Sep 17 00:00:00 2001 From: roselyn! <48699438+roselynh100@users.noreply.github.com> Date: Tue, 8 Apr 2025 11:07:00 -0400 Subject: [PATCH 2/6] Update polaris/evaluate/metrics/docking_metrics.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Honoré Hounwanou --- polaris/evaluate/metrics/docking_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polaris/evaluate/metrics/docking_metrics.py b/polaris/evaluate/metrics/docking_metrics.py index 26b5b6d5..5fad3193 100644 --- a/polaris/evaluate/metrics/docking_metrics.py +++ b/polaris/evaluate/metrics/docking_metrics.py @@ -36,7 +36,7 @@ def _rmsd(mol_probe: dm.Mol, mol_ref: dm.Mol) -> float: ) -def rmsd_coverage(y_pred: str | List[dm.Mol], y_true: str | list[dm.Mol], max_rsmd: float = 2): +def rmsd_coverage(y_pred: str | list[dm.Mol], y_true: str | list[dm.Mol], max_rsmd: float = 2): """ Calculate the coverage of molecules with an RMSD less than a threshold (2 Å by default) compared to the reference molecule conformer. From 243159eb74437b6a1e8c5cdd0aa5c9d4e225c0a4 Mon Sep 17 00:00:00 2001 From: roselyn! <48699438+roselynh100@users.noreply.github.com> Date: Tue, 8 Apr 2025 11:07:11 -0400 Subject: [PATCH 3/6] Update polaris/evaluate/metrics/docking_metrics.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Honoré Hounwanou --- polaris/evaluate/metrics/docking_metrics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/polaris/evaluate/metrics/docking_metrics.py b/polaris/evaluate/metrics/docking_metrics.py index 5fad3193..4d3905c0 100644 --- a/polaris/evaluate/metrics/docking_metrics.py +++ b/polaris/evaluate/metrics/docking_metrics.py @@ -1,6 +1,5 @@ # This script includes docking related evaluation metrics. -from typing import List import numpy as np from rdkit.Chem.rdMolAlign import CalcRMS From 07a14d22ee22e7c74366a2ddb2f2ee39b2e01624 Mon Sep 17 00:00:00 2001 From: roselyn! <48699438+roselynh100@users.noreply.github.com> Date: Tue, 8 Apr 2025 11:07:21 -0400 Subject: [PATCH 4/6] Update polaris/utils/types.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Honoré Hounwanou --- polaris/utils/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polaris/utils/types.py b/polaris/utils/types.py index 34c26bde..805d020a 100644 --- a/polaris/utils/types.py +++ b/polaris/utils/types.py @@ -109,7 +109,7 @@ Type to specify access to a dataset, benchmark or result in the Hub. """ -TimeoutTypes = Tuple[int, int] | Literal["timeout", "never"] +TimeoutTypes = tuple[int, int] | Literal["timeout", "never"] """ Timeout types for specifying maximum wait times. """ From 3d4384df556602b070bea7c4e3d6a91ffa1a38cb Mon Sep 17 00:00:00 2001 From: roselyn! <48699438+roselynh100@users.noreply.github.com> Date: Tue, 8 Apr 2025 11:07:27 -0400 Subject: [PATCH 5/6] Update polaris/utils/types.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Honoré Hounwanou --- polaris/utils/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polaris/utils/types.py b/polaris/utils/types.py index 805d020a..e7964c2e 100644 --- a/polaris/utils/types.py +++ b/polaris/utils/types.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Annotated, Any, Literal, Optional, Tuple +from typing import Annotated, Any, Literal, Optional import numpy as np from pydantic import ( From 2dce8e39fbc0165c191c4d86b0f16ad7e72274f2 Mon Sep 17 00:00:00 2001 From: Roselyn Huynh Date: Tue, 8 Apr 2025 11:12:47 -0400 Subject: [PATCH 6/6] remove other deprecated typing: Dict, List, Tuple --- polaris/dataset/_dataset.py | 4 ++-- polaris/dataset/_subset.py | 4 ++-- polaris/dataset/converters/_base.py | 4 ++-- polaris/dataset/zarr/_checksum.py | 3 +-- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/polaris/dataset/_dataset.py b/polaris/dataset/_dataset.py index 71c6223c..e9c46e66 100644 --- a/polaris/dataset/_dataset.py +++ b/polaris/dataset/_dataset.py @@ -2,7 +2,7 @@ from hashlib import md5 from os import PathLike from pathlib import Path -from typing import Any, ClassVar, List, Literal +from typing import Any, ClassVar, Literal import fsspec import numpy as np @@ -148,7 +148,7 @@ def load_zarr_root_from_hub(self): @computed_field @property - def zarr_md5sum_manifest(self) -> List[ZarrFileChecksum]: + def zarr_md5sum_manifest(self) -> list[ZarrFileChecksum]: """ The Zarr Checksum manifest stores the checksums of all files in a Zarr archive. If the dataset doesn't use Zarr, this will simply return an empty list. diff --git a/polaris/dataset/_subset.py b/polaris/dataset/_subset.py index 6d1efda3..fa1d4e75 100644 --- a/polaris/dataset/_subset.py +++ b/polaris/dataset/_subset.py @@ -1,5 +1,5 @@ from copy import deepcopy -from typing import Callable, Iterable, List, Literal, Sequence +from typing import Callable, Iterable, Literal, Sequence import numpy as np import pandas as pd @@ -239,7 +239,7 @@ def extend_inputs(self, input_cols: Iterable[str] | str) -> Self: copy.input_cols = list(set(self.input_cols + input_cols)) return copy - def filter_targets(self, target_cols: List[str] | str) -> Self: + def filter_targets(self, target_cols: list[str] | str) -> Self: """ Filter the subset to only include the specified target columns. diff --git a/polaris/dataset/converters/_base.py b/polaris/dataset/converters/_base.py index 8ed58f4d..5424794f 100644 --- a/polaris/dataset/converters/_base.py +++ b/polaris/dataset/converters/_base.py @@ -1,5 +1,5 @@ import abc -from typing import Dict, Tuple, TypeAlias +from typing import TypeAlias import pandas as pd @@ -7,7 +7,7 @@ from polaris.dataset._adapters import Adapter from polaris.dataset._dataset import _INDEX_SEP -FactoryProduct: TypeAlias = Tuple[pd.DataFrame, Dict[str, ColumnAnnotation], Dict[str, Adapter]] +FactoryProduct: TypeAlias = tuple[pd.DataFrame, dict[str, ColumnAnnotation], dict[str, Adapter]] class Converter(abc.ABC): diff --git a/polaris/dataset/zarr/_checksum.py b/polaris/dataset/zarr/_checksum.py index ccf5d9fd..4f58c9bd 100644 --- a/polaris/dataset/zarr/_checksum.py +++ b/polaris/dataset/zarr/_checksum.py @@ -38,7 +38,6 @@ from functools import total_ordering from json import dumps from pathlib import Path -from typing import List, Tuple import fsspec import zarr @@ -52,7 +51,7 @@ ZARR_DIGEST_PATTERN = "([0-9a-f]{32})-([0-9]+)-([0-9]+)" -def compute_zarr_checksum(zarr_root_path: str) -> Tuple["_ZarrDirectoryDigest", List["ZarrFileChecksum"]]: +def compute_zarr_checksum(zarr_root_path: str) -> tuple["_ZarrDirectoryDigest", list["ZarrFileChecksum"]]: r""" Implements an algorithm to compute the Zarr checksum.