polaris-hub · roselynh100 · Apr 8, 2025 · Apr 8, 2025 · Apr 8, 2025 · Apr 8, 2025
@@ -2,7 +2,7 @@
 from hashlib import md5
 from os import PathLike
 from pathlib import Path
-from typing import Any, ClassVar, List, Literal
+from typing import Any, ClassVar, Literal
 
 import fsspec
 import numpy as np
@@ -148,7 +148,7 @@ def load_zarr_root_from_hub(self):
 
     @computed_field
     @property
-    def zarr_md5sum_manifest(self) -> List[ZarrFileChecksum]:
+    def zarr_md5sum_manifest(self) -> list[ZarrFileChecksum]:
         """
         The Zarr Checksum manifest stores the checksums of all files in a Zarr archive.
         If the dataset doesn't use Zarr, this will simply return an empty list.

@@ -1,5 +1,5 @@
 from copy import deepcopy
-from typing import Callable, Iterable, List, Literal, Sequence
+from typing import Callable, Iterable, Literal, Sequence
 
 import numpy as np
 import pandas as pd
@@ -239,7 +239,7 @@ def extend_inputs(self, input_cols: Iterable[str] | str) -> Self:
         copy.input_cols = list(set(self.input_cols + input_cols))
         return copy
 
-    def filter_targets(self, target_cols: List[str] | str) -> Self:
+    def filter_targets(self, target_cols: list[str] | str) -> Self:
         """
         Filter the subset to only include the specified target columns.
 

@@ -1,13 +1,13 @@
 import abc
-from typing import Dict, Tuple, TypeAlias, Union
+from typing import TypeAlias
 
 import pandas as pd
 
 from polaris.dataset import ColumnAnnotation
 from polaris.dataset._adapters import Adapter
 from polaris.dataset._dataset import _INDEX_SEP
 
-FactoryProduct: TypeAlias = Tuple[pd.DataFrame, Dict[str, ColumnAnnotation], Dict[str, Adapter]]
+FactoryProduct: TypeAlias = tuple[pd.DataFrame, dict[str, ColumnAnnotation], dict[str, Adapter]]
 
 
 class Converter(abc.ABC):
@@ -17,7 +17,7 @@ def convert(self, path: str, append: bool = False) -> FactoryProduct:
         raise NotImplementedError
 
     @staticmethod
-    def get_pointer(column: str, index: Union[int, slice]) -> str:
+    def get_pointer(column: str, index: int | slice) -> str:
         """
         Creates a pointer.
 

@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import TYPE_CHECKING, Optional, Sequence, Union
+from typing import TYPE_CHECKING, Optional, Sequence
 
 import fastpdb
 import numpy as np
@@ -156,7 +156,7 @@ def _load_pdb(self, path: str, pdb_pointer=None) -> dict:
         return pdb_dict
 
     def _convert_pdb(
-        self, path: str, factory: "DatasetFactory", pdb_pointer: Union[str, int], append: bool = False
+        self, path: str, factory: "DatasetFactory", pdb_pointer: str | int, append: bool = False
     ) -> FactoryProduct:
         """
         Convert a single pdb to zarr file

@@ -38,7 +38,6 @@
 from functools import total_ordering
 from json import dumps
 from pathlib import Path
-from typing import List, Tuple
 
 import fsspec
 import zarr
@@ -52,7 +51,7 @@
 ZARR_DIGEST_PATTERN = "([0-9a-f]{32})-([0-9]+)-([0-9]+)"
 
 
-def compute_zarr_checksum(zarr_root_path: str) -> Tuple["_ZarrDirectoryDigest", List["ZarrFileChecksum"]]:
+def compute_zarr_checksum(zarr_root_path: str) -> tuple["_ZarrDirectoryDigest", list["ZarrFileChecksum"]]:
     r"""
     Implements an algorithm to compute the Zarr checksum.
 

@@ -1,6 +1,5 @@
 # This script includes docking related evaluation metrics.
 
-from typing import Union, List
 
 import numpy as np
 from rdkit.Chem.rdMolAlign import CalcRMS
@@ -36,7 +35,7 @@ def _rmsd(mol_probe: dm.Mol, mol_ref: dm.Mol) -> float:
     )
 
 
-def rmsd_coverage(y_pred: Union[str, List[dm.Mol]], y_true: Union[str, list[dm.Mol]], max_rsmd: float = 2):
+def rmsd_coverage(y_pred: str | list[dm.Mol], y_true: str | list[dm.Mol], max_rsmd: float = 2):
     """
     Calculate the coverage of molecules with an RMSD less than a threshold (2 Å by default) compared to the reference molecule conformer.
 

@@ -1,4 +1,3 @@
-from typing import Union
 from urllib.parse import urljoin
 
 from pydantic import ValidationInfo, field_validator
@@ -57,7 +56,7 @@ class PolarisHubSettings(BaseSettings):
     client_id: str = "agQP2xVM6JqMHvGc"
 
     # Networking settings
-    ca_bundle: Union[str, bool, None] = None
+    ca_bundle: str | bool | None = None
     default_timeout: TimeoutTypes = (10, 200)
 
     @field_validator("api_url", mode="before")

@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Annotated, Any, Literal, Optional, Tuple, Union
+from typing import Annotated, Any, Literal, Optional
 
 import numpy as np
 from pydantic import (
@@ -19,7 +19,7 @@
 A split is defined by a sequence of integers.
 """
 
-SplitType: TypeAlias = tuple[SplitIndicesType, Union[SplitIndicesType, dict[str, SplitIndicesType]]]
+SplitType: TypeAlias = tuple[SplitIndicesType, SplitIndicesType | dict[str, SplitIndicesType]]
 """
 A split is a pair of which the first item is always assumed to be the train set.
 The second item can either be a single test set or a dictionary with multiple, named test sets.
@@ -47,7 +47,7 @@
 that looks like {"test_set_name": {"target_name": np.ndarray}}.
 """
 
-DatapointPartType = Union[Any, tuple[Any], dict[str, Any]]
+DatapointPartType = Any | tuple[Any] | dict[str, Any]
 DatapointType: TypeAlias = tuple[DatapointPartType, DatapointPartType]
 """
 A datapoint has:
@@ -109,7 +109,7 @@
 Type to specify access to a dataset, benchmark or result in the Hub.
 """
 
-TimeoutTypes = Union[Tuple[int, int], Literal["timeout", "never"]]
+TimeoutTypes = tuple[int, int] | Literal["timeout", "never"]
 """
 Timeout types for specifying maximum wait times.
 """
@@ -150,7 +150,7 @@
 - A single row, e.g. dataset[0]
 - Specify a specific value, e.g. dataset[0, "col1"]
 
-There are more exciting options we could implement, such as slicing, 
+There are more exciting options we could implement, such as slicing,
 but this gets complex.
 """