diff --git a/pyproject.toml b/pyproject.toml index 4d094a94b..df71eef65 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,7 @@ dependencies = [ "matplotlib>=3.5.0,<3.7.2", "networkx>=2.2", "numba>=0.51.0,!=0.57.0", - "numpy>=1.17.0", + "numpy>=1.22.0", "pandas>=1.5.0", "pygam>=0.8.0", "pygpcca>=1.0.4", diff --git a/src/cellrank/_utils/_colors.py b/src/cellrank/_utils/_colors.py index 8b8560622..3ca195e0d 100644 --- a/src/cellrank/_utils/_colors.py +++ b/src/cellrank/_utils/_colors.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd import scipy.stats as st -from pandas.api.types import infer_dtype, is_categorical_dtype +from pandas.api.types import infer_dtype from matplotlib import cm, colors @@ -175,9 +175,9 @@ def _map_names_and_colors( Series with updated category names and a corresponding array of colors. """ # checks: dtypes, matching indices, make sure colors match the categories - if not is_categorical_dtype(series_reference): + if not isinstance(series_reference.dtype, pd.CategoricalDtype): raise TypeError(f"Reference series must be `categorical`, found `{infer_dtype(series_reference)}`.") - if not is_categorical_dtype(series_query): + if not isinstance(series_query.dtype, pd.CategoricalDtype): raise TypeError(f"Query series must be `categorical`, found `{infer_dtype(series_query)}`.") if len(series_reference) != len(series_query): raise ValueError( diff --git a/src/cellrank/_utils/_lineage.py b/src/cellrank/_utils/_lineage.py index 1531a9b4c..c4776a9d9 100644 --- a/src/cellrank/_utils/_lineage.py +++ b/src/cellrank/_utils/_lineage.py @@ -21,7 +21,7 @@ import numpy as np import pandas as pd import scipy.stats as st -from pandas.api.types import infer_dtype, is_categorical_dtype +from pandas.api.types import infer_dtype import matplotlib.pyplot as plt from matplotlib import colors @@ -102,7 +102,7 @@ def wrap(numpy_func: Callable) -> Callable: """ @functools.wraps(numpy_func) - def decorator(array, *args, **kwargs): + def decorator(array: "Lineage", *args, **kwargs): if not isinstance(array, Lineage): raise TypeError(f"Expected array to be of type `Lineage`, found `{type(array).__name__}`.") if fname == "squeeze": @@ -172,16 +172,21 @@ def decorator(array, *args, **kwargs): def _register_handled_functions(): + # adapted from: + # https://github.com/numpy/numpy/blob/v1.26.0/numpy/testing/overrides.py#L50 + try: + from numpy.core.overrides import ARRAY_FUNCTIONS + except ImportError: + ARRAY_FUNCTIONS = [getattr(np, attr) for attr in dir(np)] + handled_fns = {} - for attrname in dir(np): - fn = getattr(np, attrname) - if isinstance(fn, types.FunctionType): - try: - sig = inspect.signature(fn) - if "axis" in sig.parameters: - handled_fns[fn] = wrap(fn) - except ValueError: - pass + for fn in ARRAY_FUNCTIONS: + try: + sig = inspect.signature(fn) + if "axis" in sig.parameters: + handled_fns[fn] = wrap(fn) + except Exception: # noqa: BLE001 + pass handled_fns.pop(np.expand_dims, None) @@ -289,7 +294,7 @@ def __array_function__(self, func, types, args, kwargs): return NotImplemented # Note: this allows subclasses that don't override # __array_function__ to handle MyArray objects - if not all(issubclass(t, type(self)) for t in types): + if not all(issubclass(t, self.__class__) for t in types): return NotImplemented return _HANDLED_FUNCTIONS[func](*args, **kwargs) @@ -648,7 +653,7 @@ def reduce( "cosine_sim", "wasserstein_dist", "kl_div", "js_div", "mutual_info", "equal" ] = DistanceMeasure.MUTUAL_INFO, normalize_weights: Literal["scale", "softmax"] = NormWeights.SOFTMAX, - softmax_scale: float = 1, + softmax_scale: float = 1.0, return_weights: bool = False, ) -> Union["Lineage", Tuple["Lineage", Optional[pd.DataFrame]]]: """Subset states and normalize them so that they again sum to :math:`1`. @@ -854,7 +859,7 @@ def from_adata( states = adata.obs.get(nkey, None) if states is None: logg.warning(f"Unable to find states in `adata.obs[{nkey!r}]`. Using default names") - elif not is_categorical_dtype(states): + elif not isinstance(states.dtype, pd.CategoricalDtype): logg.warning( f"Expected `adata.obs[{key!r}]` to be `categorical`, " f"found `{infer_dtype(adata.obs[nkey])}`. Using default names" @@ -1129,10 +1134,10 @@ def _softmax(X, beta: float = 1): return np.exp(X * beta) / np.expand_dims(np.sum(np.exp(X * beta), axis=1), -1) -def _row_normalize(X): +def _row_normalize(X: Union[np.ndarray, Lineage]) -> Union[np.ndarray, Lineage]: if isinstance(X, Lineage): - return X / X.sum(1) # Lineage is shape-preserving - return X / np.expand_dims(X.sum(1), -1) + return X / X.sum(1) # lineage is shape-preserving + return X / X.sum(1, keepdims=True) def _col_normalize(X, norm_ord=2): diff --git a/src/cellrank/_utils/_linear_solver.py b/src/cellrank/_utils/_linear_solver.py index 303f92e7f..b9b32c26c 100644 --- a/src/cellrank/_utils/_linear_solver.py +++ b/src/cellrank/_utils/_linear_solver.py @@ -149,8 +149,10 @@ def _( b.setArray(mat_b.squeeze()) ksp.solve(b, x) + # `is_converged` in PETSc >= 3.20 + converged = ksp.is_converged if hasattr(ksp, "is_converged") else ksp.converged - return np.atleast_1d(x.getArray().copy().squeeze()), int(ksp.converged) + return np.atleast_1d(x.getArray().copy().squeeze()), int(converged) @_solve_many_sparse_problems_petsc.register(sp.csc_matrix) @@ -174,7 +176,8 @@ def _( ksp.solve(b, x) xs.append(np.atleast_1d(x.getArray().copy().squeeze())) - converged += ksp.converged + # `is_converged` in PETSc >= 3.20 + converged += ksp.is_converged if hasattr(ksp, "is_converged") else ksp.converged if queue is not None: queue.put(1) @@ -306,8 +309,10 @@ def _petsc_direct_solve( factored_matrix.matSolve(B, x) res = np.array(x.getDenseArray(), copy=True) + # `is_converged` in PETSc >= 3.20 + converged = ksp.is_converged if hasattr(ksp, "is_converged") else ksp.converged - if not ksp.converged: + if not converged: logg.debug( f"The solution for system " f"`A{list(A.getSize())} * X{list(x.getSize())} = B{list(B.getSize())}` " diff --git a/src/cellrank/_utils/_utils.py b/src/cellrank/_utils/_utils.py index fb1cf18af..4f0831ccf 100644 --- a/src/cellrank/_utils/_utils.py +++ b/src/cellrank/_utils/_utils.py @@ -26,7 +26,7 @@ import pandas as pd import scipy.sparse as sp import scipy.stats as st -from pandas.api.types import infer_dtype, is_categorical_dtype +from pandas.api.types import infer_dtype from sklearn.cluster import KMeans from statsmodels.stats.multitest import multipletests @@ -183,7 +183,7 @@ def _process_series( process_colors = cols is not None # assert dtype of the series - if not is_categorical_dtype(series): + if not isinstance(series.dtype, pd.CategoricalDtype): raise TypeError(f"Series must be `categorical`, found `{infer_dtype(series)}`.") # if keys is None, just return @@ -530,7 +530,7 @@ def perm_test_extractor(res: Sequence[Tuple[np.ndarray, np.ndarray]]) -> Tuple[n def _filter_cells(distances: sp.spmatrix, rc_labels: pd.Series, n_matches_min: int) -> pd.Series: """Filter out some cells that look like transient states based on their neighbors.""" - if not is_categorical_dtype(rc_labels): + if not isinstance(rc_labels.dtype, pd.CategoricalDtype): raise TypeError(f"Expected `categories` be `categorical`, found `{infer_dtype(rc_labels)}`.") # retrieve knn graph @@ -886,7 +886,7 @@ def _convert_to_categorical_series( "that there are no conflicting keys, such as `0` and `'0'`." ) - term_states = pd.Series([np.nan] * len(cell_names), index=cell_names) + term_states = pd.Series([None] * len(cell_names), index=cell_names, dtype=str) for ts, cells in mapper.items(): term_states[cells] = ts @@ -947,10 +947,10 @@ def get_color_mapper( return cols - if not is_categorical_dtype(old): + if not isinstance(old.dtype, pd.CategoricalDtype): raise TypeError(f"Expected old approx. recurrent classes to be categorical, found " f"`{infer_dtype(old)}`.") - if not is_categorical_dtype(new): + if not isinstance(new.dtype, pd.CategoricalDtype): raise TypeError(f"Expected new approx. recurrent classes to be categorical, found " f"`{infer_dtype(new)}`.") if (old.index != new.index).any(): @@ -1158,7 +1158,7 @@ def _series_from_one_hot_matrix( target_series = pd.Series(index=index, dtype="category") for vec, name in zip(membership.T, names): target_series = target_series.cat.add_categories(name) - target_series[np.where(vec)[0]] = name + target_series.iloc[np.where(vec)[0]] = name return target_series diff --git a/src/cellrank/estimators/mixins/_fate_probabilities.py b/src/cellrank/estimators/mixins/_fate_probabilities.py index c61007ec5..b14326ad9 100644 --- a/src/cellrank/estimators/mixins/_fate_probabilities.py +++ b/src/cellrank/estimators/mixins/_fate_probabilities.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd import scipy.sparse as sp -from pandas.api.types import infer_dtype, is_categorical_dtype +from pandas.api.types import infer_dtype from anndata import AnnData @@ -386,7 +386,7 @@ def compute_lineage_priming( key = next(iter(early_cells.keys())) if key not in self.adata.obs: raise KeyError(f"Unable to find clusters in `adata.obs[{key!r}]`.") - if not is_categorical_dtype(self.adata.obs[key]): + if not isinstance(self.adata.obs[key].dtype, pd.CategoricalDtype): raise TypeError( f"Expected `adata.obs[{key!r}]` to be categorical, " f"found `{infer_dtype(self.adata.obs[key])}`." ) diff --git a/src/cellrank/estimators/terminal_states/_gpcca.py b/src/cellrank/estimators/terminal_states/_gpcca.py index 007313fcc..1dbc077a0 100644 --- a/src/cellrank/estimators/terminal_states/_gpcca.py +++ b/src/cellrank/estimators/terminal_states/_gpcca.py @@ -7,7 +7,7 @@ import numpy as np import pandas as pd import scipy.sparse as sp -from pandas.api.types import infer_dtype, is_categorical_dtype +from pandas.api.types import infer_dtype import matplotlib.pyplot as plt from matplotlib.axes import Axes @@ -367,7 +367,7 @@ def predict_initial_states(self, n_states: int = 1, n_cells: int = 30, allow_ove if stat_dist is None: raise RuntimeError("No coarse-grained stationary distribution found.") - states = list(stat_dist[np.argsort(stat_dist)][:n_states].index) + states = list(stat_dist.iloc[np.argsort(stat_dist)][:n_states].index) return self.set_initial_states(states, n_cells=n_cells, allow_overlap=allow_overlap) @d.dedent @@ -876,7 +876,7 @@ def plot_macrostate_composition( raise RuntimeError("Compute macrostates first as `.compute_macrostates()`.") if key not in self.adata.obs: raise KeyError(f"Data not found in `adata.obs[{key!r}]`.") - if not is_categorical_dtype(self.adata.obs[key]): + if not isinstance(self.adata.obs[key].dtype, pd.CategoricalDtype): raise TypeError( f"Expected `adata.obs[{key!r}]` to be `categorical`, " f"found `{infer_dtype(self.adata.obs[key])}`." ) @@ -893,7 +893,7 @@ def plot_macrostate_composition( cats_colors = _create_categorical_colors(len(self.adata.obs[key].cat.categories)) cat_color_mapper = dict(zip(self.adata.obs[key].cat.categories, cats_colors)) x_indices = np.arange(len(macrostates.cat.categories)) - bottom = np.zeros_like(x_indices, dtype=np.float32) + bottom = np.zeros_like(x_indices, dtype=float) width = min(1, max(0, width)) fig, ax = plt.subplots(figsize=figsize, dpi=dpi, tight_layout=True) @@ -1155,7 +1155,7 @@ def _write_macrostates( self._set("_coarse_stat_dist", value=stat_dist, shadow_only=True) self._set( obj=self.adata.uns, key=Key.uns.coarse(self.backward), - value=AnnData(tmat, obs=dists, dtype=float) + value=AnnData(tmat, obs=dists), ) else: for attr in ["_schur_vectors", "_schur_matrix", "_coarse_tmat", "_coarse_init_dist", "_coarse_stat_dist"]: diff --git a/src/cellrank/estimators/terminal_states/_term_states_estimator.py b/src/cellrank/estimators/terminal_states/_term_states_estimator.py index 6871a8e55..67fc35027 100644 --- a/src/cellrank/estimators/terminal_states/_term_states_estimator.py +++ b/src/cellrank/estimators/terminal_states/_term_states_estimator.py @@ -7,7 +7,7 @@ import numpy as np import pandas as pd import scipy.sparse as sp -from pandas.api.types import infer_dtype, is_categorical_dtype +from pandas.api.types import infer_dtype from matplotlib.colors import to_hex @@ -411,7 +411,7 @@ def _plot_discrete( ) -> None: if not isinstance(_data, pd.Series): raise TypeError(f"Expected `data` to be of type `pandas.Series`, found `{type(_data)}`.") - if not is_categorical_dtype(_data): + if not isinstance(_data.dtype, pd.CategoricalDtype): raise TypeError(f"Expected `data` to be `categorical`, found `{infer_dtype(_data)}`.") names = list(_data.cat.categories) @@ -564,7 +564,9 @@ def _set_categorical_labels( # fmt: off if isinstance(categories, dict): key = next(iter(categories.keys())) - if len(categories) == 1 and is_categorical_dtype(self.adata.obs.get(key, None)): + data = self.adata.obs.get(key, None) + is_categorical = data is not None and isinstance(data.dtype, pd.CategoricalDtype) + if len(categories) == 1 and is_categorical: vals = categories[key] if isinstance(vals, str) or not isinstance(vals, Sequence): vals = (categories[key],) @@ -575,7 +577,7 @@ def _set_categorical_labels( categories = {cat: self.adata[clusters == cat].obs_names for cat in vals} categories = _convert_to_categorical_series(categories, list(self.adata.obs_names)) - if not is_categorical_dtype(categories): + if not isinstance(categories.dtype, pd.CategoricalDtype): raise TypeError(f"Expected object to be `categorical`, found `{infer_dtype(categories)}`.") if existing is not None: diff --git a/src/cellrank/kernels/_precomputed_kernel.py b/src/cellrank/kernels/_precomputed_kernel.py index adbf3fca0..c5d97b4e0 100644 --- a/src/cellrank/kernels/_precomputed_kernel.py +++ b/src/cellrank/kernels/_precomputed_kernel.py @@ -142,7 +142,7 @@ def _from_matrix( # fmt: off if adata is None: logg.warning(f"Creating empty `AnnData` object of shape `{matrix.shape[0], 1}`") - adata = AnnData(sp.csr_matrix((matrix.shape[0], 1), dtype=np.float64)) + adata = AnnData(sp.csr_matrix((matrix.shape[0], 1))) super().__init__(adata) self._backward: Optional[bool] = backward self.transition_matrix = matrix.copy() if copy else matrix diff --git a/src/cellrank/kernels/_real_time_kernel.py b/src/cellrank/kernels/_real_time_kernel.py index 494ef3a7a..cc4849f3c 100644 --- a/src/cellrank/kernels/_real_time_kernel.py +++ b/src/cellrank/kernels/_real_time_kernel.py @@ -20,7 +20,7 @@ import numpy as np import pandas as pd import scipy.sparse as sp -from pandas.api.types import infer_dtype, is_categorical_dtype +from pandas.api.types import infer_dtype import scanpy as sc from anndata import AnnData @@ -105,7 +105,7 @@ def _read_from_adata( ) -> None: super()._read_from_adata(**kwargs) self._time = self.adata.obs[time_key].copy() - if not is_categorical_dtype(self._time): + if not isinstance(self._time.dtype, pd.CategoricalDtype): raise TypeError(f"Expected `adata.obs[{time_key!r}]` to be categorical, found `{infer_dtype(self._time)}`.") self._time = self._time.cat.remove_unused_categories() cats = self._time.cat.categories @@ -448,7 +448,7 @@ def _restich_couplings( for ix in range(len(blocks)): index.extend(obs_names[ix]) - tmp = AnnData(sp.bmat(blocks, format="csr"), dtype="float64") + tmp = AnnData(sp.bmat(blocks, format="csr")) tmp.obs_names = index tmp.var_names = index tmp = tmp[self.adata.obs_names, :][:, self.adata.obs_names] @@ -513,14 +513,14 @@ def _sparsify_couplings( if threshold == "auto_local": thresh = min(tmat[i].max() for i in range(tmat.shape[0])) logg.debug(f"Using `threshold={thresh}` at `{key}`") - elif isinstance(threshold, (int, float)): + elif isinstance(threshold, (int, float, np.number)): thresh = np.percentile(tmat.data, threshold) logg.debug(f"Using `threshold={thresh}` at `{key}`") tmat = sp.csr_matrix(tmat, dtype=tmat.dtype) tmat.data[tmat.data < thresh] = 0.0 tmat.eliminate_zeros() - couplings[key] = AnnData(tmat, obs=adata.obs, var=adata.var, dtype=tmat.dtype) + couplings[key] = AnnData(tmat, obs=adata.obs, var=adata.var) return couplings if copy else None @@ -579,9 +579,9 @@ def assert_same(expected: Sequence[Any], actual: Sequence[Any], msg: Optional[st def _coupling_to_adata(self, src: Any, tgt: Any, coupling: Coupling_t) -> AnnData: """Convert the coupling to :class:`~anndata.AnnData`.""" if not isinstance(coupling, AnnData): - coupling = AnnData(X=coupling, dtype=coupling.dtype) - coupling.obs_names = self.adata[self._time == src].obs_names - coupling.var_names = self.adata[self._time == tgt].obs_names + coupling = AnnData(X=coupling) + coupling.obs_names = np.asarray(self.adata.obs_names)[self.time == src] + coupling.var_names = np.asarray(self.adata.obs_names)[self.time == tgt] if sp.issparse(coupling.X) and not sp.isspmatrix_csr(coupling.X): coupling.X = coupling.X.tocsr() diff --git a/src/cellrank/kernels/_utils.py b/src/cellrank/kernels/_utils.py index 1d663c401..e53bbd8b6 100644 --- a/src/cellrank/kernels/_utils.py +++ b/src/cellrank/kernels/_utils.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd from numba import prange -from pandas.api.types import infer_dtype, is_categorical_dtype, is_numeric_dtype +from pandas.api.types import infer_dtype from anndata import AnnData @@ -130,19 +130,19 @@ def _ensure_numeric_ordered(adata: AnnData, key: str) -> pd.Series: raise KeyError(f"Unable to find data in `adata.obs[{key!r}]`.") exp_time = adata.obs[key].copy() - if not is_numeric_dtype(np.asarray(exp_time)): + if not np.issubdtype(np.asarray(exp_time).dtype, np.number): try: exp_time = np.asarray(exp_time).astype(float) - except ValueError as e: + except Exception as e: # noqa: BLE001/Cannot interpret raise TypeError( f"Unable to convert `adata.obs[{key!r}]` of type `{infer_dtype(adata.obs[key])}` to `float`." ) from e - if not is_categorical_dtype(exp_time): + if not isinstance(exp_time.dtype, pd.CategoricalDtype): logg.debug(f"Converting `adata.obs[{key!r}]` to `categorical`") exp_time = np.asarray(exp_time) categories = sorted(set(exp_time[~np.isnan(exp_time)])) - if len(categories) > 100: + if len(categories) > 100: # arbitrary threshold raise ValueError( f"Unable to convert `adata.obs[{key!r}]` to `categorical` since it " f"would create `{len(categories)}` categories." diff --git a/src/cellrank/kernels/utils/_random_walk.py b/src/cellrank/kernels/utils/_random_walk.py index 34c751bd0..665a0da6b 100644 --- a/src/cellrank/kernels/utils/_random_walk.py +++ b/src/cellrank/kernels/utils/_random_walk.py @@ -5,8 +5,9 @@ import scvelo as scv import numpy as np +import pandas as pd import scipy.sparse as sp -from pandas.api.types import infer_dtype, is_categorical_dtype, is_numeric_dtype +from pandas.api.types import infer_dtype import matplotlib.pyplot as plt from matplotlib.collections import LineCollection @@ -294,9 +295,9 @@ def _normalize_ixs(self, ixs: Indices_t, *, kind: Literal["start", "stop"]) -> O raise KeyError(f"Unable to find data in `adata.obs[{key!r}]`.") vals = self._adata.obs[key] - if is_categorical_dtype(vals): + if isinstance(vals.dtype, pd.CategoricalDtype): ixs = np.where(np.isin(vals, ixs[key]))[0] - elif is_numeric_dtype(vals): + elif np.issubdtype(vals.dtype, np.number): if len(ixs[key]) != 2: raise ValueError(f"Expected range to be of length `2`, found `{len(ixs[key])}`") minn, maxx = sorted(ixs[key]) diff --git a/src/cellrank/kernels/utils/_tmat_flow.py b/src/cellrank/kernels/utils/_tmat_flow.py index 5b081e5e3..610b45ec9 100644 --- a/src/cellrank/kernels/utils/_tmat_flow.py +++ b/src/cellrank/kernels/utils/_tmat_flow.py @@ -5,7 +5,7 @@ import pandas as pd import scipy.sparse as sp import scipy.stats as st -from pandas.api.types import infer_dtype, is_categorical_dtype +from pandas.api.types import infer_dtype from scipy.interpolate import interp1d from statsmodels.nonparametric.smoothers_lowess import lowess @@ -71,7 +71,7 @@ def __init__( if self._ckey not in self._adata.obs: raise KeyError(f"Unable to find clusters in `adata.obs[{self._ckey!r}]`.") - if not is_categorical_dtype(self._adata.obs[self._ckey]): + if not isinstance(self._adata.obs[self._ckey].dtype, pd.CategoricalDtype): raise TypeError( f"Expected `adata.obs[{self._ckey!r}]` to be categorical, " f"found `{infer_dtype(self._adata.obs[self._ckey])}`." diff --git a/src/cellrank/models/_base_model.py b/src/cellrank/models/_base_model.py index c2c4ce45c..da4efb965 100644 --- a/src/cellrank/models/_base_model.py +++ b/src/cellrank/models/_base_model.py @@ -9,8 +9,9 @@ import wrapt import numpy as np +import pandas as pd import scipy.sparse as sp -from pandas.api.types import infer_dtype, is_categorical_dtype, is_numeric_dtype +from pandas.api.types import infer_dtype from scipy.ndimage import convolve import matplotlib as mpl @@ -399,7 +400,7 @@ def prepare( else: val_start, val_end = None, time_range - if isinstance(weight_threshold, (int, float)): + if isinstance(weight_threshold, (int, float, np.number)): weight_threshold = (weight_threshold, weight_threshold) if len(weight_threshold) != 2: raise ValueError(f"Expected `weight_threshold` to be of size `2`, found `{len(weight_threshold)}`.") @@ -1076,7 +1077,7 @@ def _get_colors( return None, key, ColorType.STR, None if key in self.adata.obs: - if is_categorical_dtype(self.adata.obs[key]): + if isinstance(self.adata.obs[key].dtype, pd.CategoricalDtype): add_colors_for_categorical_sample_annotation( self.adata, key=key, @@ -1097,7 +1098,7 @@ def _get_colors( col_dict, ) - if is_numeric_dtype(self.adata.obs[key]): + if np.issubdtype(self.adata.obs[key].dtype, np.number): return key, self.adata.obs[key].values, ColorType.CONT, None logg.debug(f"Unable to interpret cell color from type `{infer_dtype(self.adata.obs[key])}`") diff --git a/src/cellrank/pl/_aggregate_fate_probs.py b/src/cellrank/pl/_aggregate_fate_probs.py index 83a05e5fa..652aa08ef 100644 --- a/src/cellrank/pl/_aggregate_fate_probs.py +++ b/src/cellrank/pl/_aggregate_fate_probs.py @@ -328,7 +328,7 @@ def plot_violin_no_cluster_key(): kwargs["rotation"] = xrot data = np.ravel(probs.X.T)[..., None] - tmp = AnnData(sp.csr_matrix(data.shape, dtype=data.dtype), dtype=data.dtype) + tmp = AnnData(sp.csr_matrix(data.shape, dtype=data.dtype)) tmp.obs["fate probability"] = data tmp.obs[term_states] = ( pd.Series(np.concatenate([[f"{direction.lower()} {n}"] * adata.n_obs for n in probs.names])) diff --git a/src/cellrank/pl/_circular_projection.py b/src/cellrank/pl/_circular_projection.py index 307c79391..6604070af 100644 --- a/src/cellrank/pl/_circular_projection.py +++ b/src/cellrank/pl/_circular_projection.py @@ -288,7 +288,7 @@ def circular_projection( # clockwise for color, text in zip(probs.colors[::-1], texts): - if isinstance(label_rot, (int, float)): + if isinstance(label_rot, (int, float, np.number)): text.set_rotation(label_rot) elif label_rot == LabelRot.BEST: rot = text.get_rotation() diff --git a/src/cellrank/pl/_heatmap.py b/src/cellrank/pl/_heatmap.py index e458c8974..be49a7b84 100644 --- a/src/cellrank/pl/_heatmap.py +++ b/src/cellrank/pl/_heatmap.py @@ -178,7 +178,7 @@ def find_nearest(array: np.ndarray, value: float) -> int: return int(ix) series = series.sort_values(ascending=True) - return list(series[[find_nearest(series.values, v) for v in values]].index) + return list(series.iloc[[find_nearest(series.values, v) for v in values]].index) def subset_lineage(lname: str, rng: np.ndarray) -> np.ndarray: time_series = adata.obs[time_key] diff --git a/src/cellrank/pl/_utils.py b/src/cellrank/pl/_utils.py index 0d1120b3f..a58f286af 100644 --- a/src/cellrank/pl/_utils.py +++ b/src/cellrank/pl/_utils.py @@ -18,7 +18,7 @@ import numpy as np import pandas as pd -from pandas.api.types import infer_dtype, is_categorical_dtype, is_numeric_dtype +from pandas.api.types import infer_dtype import matplotlib as mpl import matplotlib.pyplot as plt @@ -342,7 +342,8 @@ def is_valid(x: Union[BaseModel, BulkRes]) -> bool: return x.x_test is not None and x.y_test is not None and np.all(np.isfinite(x.y_test)) modelmat = pd.DataFrame(models).T - modelmask = modelmat.applymap(is_valid) + # `DataFrame.map` in `pandas>=2.1.0` + modelmask = modelmat.map(is_valid) if hasattr(modelmat, "map") else modelmat.applymap(is_valid) to_keep = modelmask[modelmask.any(axis=1)] to_keep = to_keep.loc[:, to_keep.any(axis=0)].T @@ -892,7 +893,7 @@ def composition( """ if key not in adata.obs: raise KeyError(f"Data not found in `adata.obs[{key!r}]`.") - if not is_categorical_dtype(adata.obs[key]): + if not isinstance(adata.obs[key].dtype, pd.CategoricalDtype): raise TypeError(f"Expected `adata.obs[{key!r}]` is not `categorical`, found `{infer_dtype(adata.obs[key])}`.") colors = adata.uns.get(f"{key}_colors", None) @@ -986,7 +987,7 @@ def _held_karp(dists: np.ndarray) -> Tuple[float, np.ndarray]: def _get_categorical_colors(adata: AnnData, cluster_key: str) -> Tuple[np.ndarray, Mapping[str, str]]: if cluster_key not in adata.obs: raise KeyError(f"Unable to find data in `adata.obs[{cluster_key!r}].`") - if not is_categorical_dtype(adata.obs[cluster_key]): + if not isinstance(adata.obs[cluster_key].dtype, pd.CategoricalDtype): raise TypeError( f"Expected `adata.obs[{cluster_key!r}]` to be categorical, " f"found `{infer_dtype(adata.obs[cluster_key])}`." @@ -1030,7 +1031,7 @@ def _get_sorted_colors( cols, mapper = _get_categorical_colors(adata, ck) res.append(np.array([colors.to_hex(mapper[v]) for v in adata.obs[ck].values[order]])) except TypeError: - if not is_numeric_dtype(adata.obs[ck]): + if not np.issubdtype(adata.obs[ck].dtype, np.number): raise TypeError( f"Expected `adata.obs[{cluster_key!r}]` to be numeric, " f"found `{infer_dtype(adata.obs[cluster_key])}`." diff --git a/tests/_helpers.py b/tests/_helpers.py index 37cb8b8d3..e5a114078 100644 --- a/tests/_helpers.py +++ b/tests/_helpers.py @@ -56,7 +56,7 @@ def _rpy2_mgcv_not_installed() -> bool: def bias_knn( conn: sp.csr_matrix, - pseudotime: np.ndarray, + pseudotime: pd.Series, n_neighbors: int, k: int = 3, frac_to_keep: Optional[float] = None, @@ -72,7 +72,7 @@ def bias_knn( # get indices, values and current pseudo t row_data = conn[i, :].data row_ixs = conn[i, :].indices - current_t = pseudotime[i] + current_t = pseudotime.iloc[i] if frac_to_keep is not None: k_thresh = max(0, min(30, int(np.floor(len(row_data) * frac_to_keep)))) @@ -83,7 +83,7 @@ def bias_knn( cand_ixs = sorted_ixs[k_thresh:] # compare pseudotimes and set indices to zero - cand_t = pseudotime[cand_ixs] + cand_t = pseudotime.iloc[cand_ixs] rem_ixs = cand_ixs[cand_t < current_t] conn_biased[i, rem_ixs] = 0 diff --git a/tests/test_cflare.py b/tests/test_cflare.py index aa304fe38..b43c0ca48 100644 --- a/tests/test_cflare.py +++ b/tests/test_cflare.py @@ -6,7 +6,6 @@ import numpy as np import pandas as pd -from pandas.api.types import is_categorical_dtype from anndata import AnnData @@ -66,7 +65,7 @@ def test_compute_approx_normal_run(self, adata_large: AnnData): mc.compute_eigendecomposition(k=5) mc.predict(use=2) - assert is_categorical_dtype(mc.terminal_states) + assert isinstance(mc.terminal_states.dtype, pd.CategoricalDtype) assert mc.terminal_states_probabilities is not None key = Key.obs.term_states(mc.backward) @@ -416,7 +415,7 @@ def test_compare_fate_probabilities_with_reference(self): c = cr.estimators.CFLARE(cr.kernels.PrecomputedKernel(transition_matrix)) - state_annotation = pd.Series(index=range(len(c))) + state_annotation = pd.Series(index=range(len(c)), dtype=str) state_annotation[7] = "terminal_1" state_annotation[10] = "terminal_2" state_annotation = state_annotation.astype("category") @@ -425,7 +424,7 @@ def test_compare_fate_probabilities_with_reference(self): c.compute_fate_probabilities() fate_probabilities_query = c.fate_probabilities[state_annotation.isna()] - np.allclose(fate_probabilities_query, fate_probabilities_reference) + np.allclose(fate_probabilities_query.X, fate_probabilities_reference) def test_manual_approx_rc_set(self, adata_large): adata = adata_large diff --git a/tests/test_colors.py b/tests/test_colors.py index d483eac3f..8915cd89e 100644 --- a/tests/test_colors.py +++ b/tests/test_colors.py @@ -2,7 +2,6 @@ import numpy as np import pandas as pd -from pandas.api.types import is_categorical_dtype from matplotlib.colors import is_color_like @@ -87,7 +86,7 @@ def test_mapping_colors_simple_2(self): assert isinstance(res, pd.Series) assert len(res) == 3 - assert is_categorical_dtype(res) + assert isinstance(res.dtype, pd.CategoricalDtype) def test_mapping_colors_simple_colors(self): query = pd.Series(["foo", "bar", "baz"], dtype="category") @@ -97,7 +96,7 @@ def test_mapping_colors_simple_colors(self): assert isinstance(res, pd.Series) assert len(res) == 3 - assert is_categorical_dtype(res) + assert isinstance(res.dtype, pd.CategoricalDtype) assert isinstance(c, list) assert c == ["#ff0000", "#008000", "#0000ff"] @@ -110,7 +109,7 @@ def test_mapping_colors_too_many_colors(self): assert isinstance(res, pd.Series) assert len(res) == 3 - assert is_categorical_dtype(res) + assert isinstance(res.dtype, pd.CategoricalDtype) assert isinstance(c, list) assert c == ["#ff0000", "#008000", "#0000ff"] @@ -123,7 +122,7 @@ def test_mapping_colors_different_color_representation(self): assert isinstance(res, pd.Series) assert len(res) == 3 - assert is_categorical_dtype(res) + assert isinstance(res.dtype, pd.CategoricalDtype) assert isinstance(c, list) assert c == ["#ff0000", "#008000", "#0000ff"] @@ -136,7 +135,7 @@ def test_mapping_colors_non_unique_colors(self): assert isinstance(res, pd.Series) assert len(res) == 3 - assert is_categorical_dtype(res) + assert isinstance(res.dtype, pd.CategoricalDtype) assert isinstance(c, list) assert c == ["#ff0000", "#ff0000", "#ff0000"] @@ -168,7 +167,7 @@ def test_mapping_colors_empty(self): r = _map_names_and_colors(reference, query) assert isinstance(r, pd.Series) - assert is_categorical_dtype(r) + assert isinstance(r.dtype, pd.CategoricalDtype) def test_mapping_colors_empty_with_color(self): query = pd.Series([], dtype="category") @@ -177,7 +176,7 @@ def test_mapping_colors_empty_with_color(self): r, c = _map_names_and_colors(reference, query, colors_reference=[]) assert isinstance(r, pd.Series) - assert is_categorical_dtype(r) + assert isinstance(r.dtype, pd.CategoricalDtype) assert isinstance(c, list) assert len(c) == 0 @@ -196,7 +195,7 @@ def test_mapping_colors_0_en_cutoff(self): r = _map_names_and_colors(reference, query, en_cutoff=0) assert isinstance(r, pd.Series) - assert is_categorical_dtype(r) + assert isinstance(r.dtype, pd.CategoricalDtype) assert list(r.index) == ["bar"] assert list(r.values) == ["bar"] @@ -229,7 +228,7 @@ def test_mapping_colors_name_order_same_as_cat_order(self): res = _map_names_and_colors(x, y) assert isinstance(res, pd.Series) - assert is_categorical_dtype(res) + assert isinstance(res.dtype, pd.CategoricalDtype) np.testing.assert_array_equal(res.values, expected.values) np.testing.assert_array_equal(res.index.values, expected_index.values) np.testing.assert_array_equal(res.cat.categories.values, res.values) diff --git a/tests/test_gpcca.py b/tests/test_gpcca.py index 98315430c..7de29217c 100644 --- a/tests/test_gpcca.py +++ b/tests/test_gpcca.py @@ -160,7 +160,7 @@ def _check_compute_macro(mc: cr.estimators.GPCCA) -> None: else: assert isinstance(mc.macrostates_memberships, cr.Lineage) if mc.macrostates_memberships.shape[1] > 1: - np.testing.assert_allclose(mc.macrostates_memberships.sum(1), 1.0) + np.testing.assert_allclose(mc.macrostates_memberships.X.sum(1), 1.0) assert isinstance(mc.schur_matrix, np.ndarray) assert isinstance(mc.schur_vectors, np.ndarray) diff --git a/tests/test_kernels.py b/tests/test_kernels.py index a96528a0b..d6b476c8f 100644 --- a/tests/test_kernels.py +++ b/tests/test_kernels.py @@ -18,7 +18,7 @@ from pandas.core.dtypes.common import is_bool_dtype, is_integer_dtype import scanpy as sc -from anndata import AnnData +from anndata import AnnData, read_h5ad from scanpy import Neighbors import cellrank as cr @@ -603,9 +603,9 @@ def test_read_correct_from_layers( adata.obsm[vkey] = adata.layers[vkey][:, : adata.obsm[xkey].shape[1]] nans_v = np.isnan(np.sum(adata.obsm[vkey], axis=0)) - gene_subset = adata.var[f"{vkey}_genes"] + gene_subset = adata.var[f"{vkey}_genes"].copy() if use_gene_subset: - gene_subset[10:] = False + gene_subset.iloc[10:] = False else: gene_subset = None @@ -658,7 +658,7 @@ def test_addition_3_kernels(self, adata: AnnData): k = (vk + ck + vk1).compute_transition_matrix() expected = ( np.eye(adata.n_obs) * (1 / 3 + 1 / 6 + 1 / 6) - + np.eye(adata._n_obs, k=1) * 1 / 6 + + np.eye(adata.n_obs, k=1) * 1 / 6 + np.eye(adata.n_obs, k=-1) * 1 / 6 ) expected[0, 0] = expected[-1, -1] = 2 / 3 + 1 / 3 * 0.5 @@ -925,7 +925,7 @@ def test_save_to_anndata(self, adata: AnnData, tmpdir): sc.write(path, adata) - bdata = sc.read(path) + bdata = read_h5ad(path) assert vk.params == bdata.uns[f"{key}_params"]["params"] @@ -1151,7 +1151,7 @@ def test_explicit_initialization(self, adata: AnnData, correct_shape: bool): val = np.abs(rng.normal(size=(n, m))) if not correct_shape: val = AnnData(val) - val.obs_names = list(adata.obs_names[col == src]) + [adata.obs_names[-1]] + val.obs_names = list(adata.obs_names[col == src]) + ["foo"] val.var_names = adata.obs_names[col == tgt] couplings[src, tgt] = val diff --git a/tests/test_lineage.py b/tests/test_lineage.py index 039bcf513..a63178808 100644 --- a/tests/test_lineage.py +++ b/tests/test_lineage.py @@ -13,7 +13,7 @@ from anndata import AnnData, read_h5ad, read_zarr -from cellrank._utils import Lineage +from cellrank import Lineage from cellrank._utils._colors import _compute_mean_color, _create_categorical_colors from cellrank._utils._lineage import _HT_CELLS, LineageView, PrimingDegree diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index a882956cb..1b32f4052 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -1,6 +1,5 @@ import numpy as np import pandas as pd -from pandas.api.types import is_categorical_dtype from anndata import AnnData @@ -13,7 +12,7 @@ def _assert_has_all_keys(adata: AnnData, bwd: bool = False) -> None: # fmt: off # term states key = Key.obs.term_states(bwd) - assert is_categorical_dtype(adata.obs[key]) + assert isinstance(adata.obs[key].dtype, pd.CategoricalDtype) assert Key.obs.probs(key) in adata.obs assert Key.uns.colors(key) in adata.uns diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 0940b58e3..cc5c8b2cb 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -14,7 +14,6 @@ import numpy as np import pandas as pd import scipy.sparse as sp -from pandas.api.types import is_categorical_dtype import matplotlib.cm as cm import matplotlib.pyplot as plt @@ -678,7 +677,7 @@ def test_cluster_lineage_writes(self, adata_cflare: AnnData): assert isinstance(adata_cflare.uns["lineage_0_trend"], AnnData) assert adata_cflare.uns["lineage_0_trend"].shape == (10, 200) - assert is_categorical_dtype(adata_cflare.uns["lineage_0_trend"].obs["clusters"]) + assert isinstance(adata_cflare.uns["lineage_0_trend"].obs["clusters"].dtype, pd.CategoricalDtype) def test_cluster_lineage_key(self, adata_cflare: AnnData): model = create_model(adata_cflare) @@ -694,7 +693,7 @@ def test_cluster_lineage_key(self, adata_cflare: AnnData): assert isinstance(adata_cflare.uns["foobar"], AnnData) assert adata_cflare.uns["foobar"].shape == (10, 200) - assert is_categorical_dtype(adata_cflare.uns["foobar"].obs["clusters"]) + assert isinstance(adata_cflare.uns["foobar"].obs["clusters"].dtype, pd.CategoricalDtype) @compare() def test_cluster_lineage_covariates(self, adata: AnnData, fpath: str): diff --git a/tests/test_utils.py b/tests/test_utils.py index 459cc21d6..ee729a6c0 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -7,7 +7,6 @@ import numpy as np import pandas as pd import scipy.sparse as sp -from pandas.api.types import is_categorical_dtype import scanpy as sc from anndata import AnnData @@ -263,7 +262,7 @@ def test_return_colors(self): res, colors = _process_series(x, keys=["b, a", "d, c"], cols=["red", "green", "blue", "white"]) assert isinstance(res, pd.Series) - assert is_categorical_dtype(res) + assert isinstance(res.dtype, pd.CategoricalDtype) assert isinstance(colors, list) np.testing.assert_array_equal(res.values, expected.values)