Removing Master Changes

scverse · Mar 27, 2023 · 0cf93d9 · 0cf93d9
1 parent 686e6a9
commit 0cf93d9
Show file tree

Hide file tree

Showing 3 changed files with 92 additions and 148 deletions.
diff --git a/scanpy/preprocessing/_simple.py b/scanpy/preprocessing/_simple.py
@@ -10,6 +10,7 @@
 import numba
 import numpy as np
 import scipy as sp
+import time
 from scipy.sparse import issparse, isspmatrix_csr, csr_matrix, spmatrix
 from sklearn.utils import sparsefuncs, check_array
 from pandas.api.types import is_categorical_dtype
@@ -827,6 +828,35 @@ def scale_sparse(
     )
 
 
+@numba.njit(cache=True, parallel=True)
+def do_scale(X, maxv, nthr):
+    # nthr = numba.get_num_threads()
+    # t0= time.time()
+    s = np.zeros((nthr, X.shape[1]))
+    ss = np.zeros((nthr, X.shape[1]))
+    mean = np.zeros(X.shape[1])
+    std = np.zeros(X.shape[1])
+    n = X.shape[0]
+    for i in numba.prange(nthr):
+        for r in range(i, n, nthr):
+            for c in range(X.shape[1]):
+                v = X[r, c]
+                s[i, c] += v
+                ss[i, c] += v * v
+    for c in numba.prange(X.shape[1]):
+        s0 = s[:, c].sum()
+        mean[c] = s0 / n
+        std[c] = np.sqrt((ss[:, c].sum() - s0 * s0 / n) / (n - 1))
+
+    # with numba.objmode():
+    #    print ("finshed getting means, stddev", time.time()-t0)
+    for r, c in numba.pndindex(X.shape):
+        v = (X[r, c] - mean[c]) / std[c]
+        X[r, c] = maxv if v > maxv else v
+    # with numba.objmode():
+    #    print ("finshed scaling values", time.time()-t0)
+
+
 @scale.register(AnnData)
 def scale_anndata(
     adata: AnnData,
@@ -840,13 +870,14 @@ def scale_anndata(
     adata = adata.copy() if copy else adata
     view_to_actual(adata)
     X = _get_obs_rep(adata, layer=layer, obsm=obsm)
-    X, adata.var["mean"], adata.var["std"] = scale(
-        X,
-        zero_center=zero_center,
-        max_value=max_value,
-        copy=False,  # because a copy has already been made, if it were to be made
-        return_mean_std=True,
-    )
+    # X, adata.var["mean"], adata.var["std"] = scale(
+    #    X,
+    #    zero_center=zero_center,
+    #    max_value=max_value,
+    #    copy=False,  # because a copy has already been made, if it were to be made
+    #    return_mean_std=True,
+    # )
+    do_scale(adata.X, max_value, numba.get_num_threads())
     _set_obs_rep(adata, X, layer=layer, obsm=obsm)
     if copy:
         return adata

diff --git a/scanpy/tools/_leiden.py b/scanpy/tools/_leiden.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple, Sequence, Type, Literal
+from typing import Optional, Tuple, Sequence, Type
 
 import numpy as np
 import pandas as pd
@@ -36,7 +36,6 @@ def leiden(
     neighbors_key: Optional[str] = None,
     obsp: Optional[str] = None,
     copy: bool = False,
-    flavor: Literal['default', 'katana'] = 'default',
     **partition_kwargs,
 ) -> Optional[AnnData]:
     """\
@@ -105,103 +104,57 @@ def leiden(
         A dict with the values for the parameters `resolution`, `random_state`,
         and `n_iterations`.
     """
-    if flavor == 'default':
-        try:
-            import leidenalg
-        except ImportError:
-            raise ImportError(
-                'Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip3 install leidenalg`.'
-            )
-        partition_kwargs = dict(partition_kwargs)
-
-        start = logg.info('running Leiden clustering')
-        adata = adata.copy() if copy else adata
-        # are we clustering a user-provided graph or the default AnnData one?
-        if adjacency is None:
-            adjacency = _utils._choose_graph(adata, obsp, neighbors_key)
-        if restrict_to is not None:
-            restrict_key, restrict_categories = restrict_to
-            adjacency, restrict_indices = restrict_adjacency(
-                adata,
-                restrict_key,
-                restrict_categories,
-                adjacency,
-            )
-        # convert it to igraph
-        g = _utils.get_igraph_from_adjacency(adjacency, directed=directed)
-        # flip to the default partition type if not overriden by the user
-        if partition_type is None:
-            partition_type = leidenalg.RBConfigurationVertexPartition
-        # Prepare find_partition arguments as a dictionary,
-        # appending to whatever the user provided. It needs to be this way
-        # as this allows for the accounting of a None resolution
-        # (in the case of a partition variant that doesn't take it on input)
-        if use_weights:
-            partition_kwargs['weights'] = np.array(g.es['weight']).astype(np.float64)
-        partition_kwargs['n_iterations'] = n_iterations
-        partition_kwargs['seed'] = random_state
-        if resolution is not None:
-            partition_kwargs['resolution_parameter'] = resolution
-        # clustering proper
-        part = leidenalg.find_partition(g, partition_type, **partition_kwargs)
-        # store output into adata.obs
-        groups = np.array(part.membership)
-        if restrict_to is not None:
-            if key_added == 'leiden':
-                key_added += '_R'
-            groups = rename_groups(
-                adata,
-                key_added,
-                restrict_key,
-                restrict_categories,
-                restrict_indices,
-                groups,
-            )
-
-    elif flavor == 'katana':
-        from scanpy._utils import _choose_graph
-
-        adjacency = _choose_graph(adata, obsp=None, neighbors_key=None)
-
-        sources, targets = adjacency.nonzero()
-        weights = adjacency[sources, targets]
-        if isinstance(weights, np.matrix):
-            weights = weights.A1
-        start = logg.info('running Leiden clustering')
-
-        from katana.local.analytics import (
-            leiden_clustering,
-            LeidenClusteringStatistics,
-            LeidenClusteringPlan,
+    try:
+        import leidenalg
+    except ImportError:
+        raise ImportError(
+            'Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip3 install leidenalg`.'
         )
-        from katana.local.import_data import from_edge_list_arrays
-
-        property_dict = {"value": weights}
-        graph = from_edge_list_arrays(sources, targets, property_dict)
-
-        enable_vf = True
-        modularity_threshold_per_round = 0.0001
-        modularity_threshold_total = 0.0001
-        max_iterations = 1000000
-        min_graph_size = 0
-        resolution = 1.0
-        randomness = 0
-
-        leiden_plan = LeidenClusteringPlan.deterministic(
-            enable_vf,
-            modularity_threshold_per_round,
-            modularity_threshold_total,
-            max_iterations,
-            min_graph_size,
-            resolution,
-            randomness,
+    partition_kwargs = dict(partition_kwargs)
+
+    start = logg.info('running Leiden clustering')
+    adata = adata.copy() if copy else adata
+    # are we clustering a user-provided graph or the default AnnData one?
+    if adjacency is None:
+        adjacency = _utils._choose_graph(adata, obsp, neighbors_key)
+    if restrict_to is not None:
+        restrict_key, restrict_categories = restrict_to
+        adjacency, restrict_indices = restrict_adjacency(
+            adata,
+            restrict_key,
+            restrict_categories,
+            adjacency,
+        )
+    # convert it to igraph
+    g = _utils.get_igraph_from_adjacency(adjacency, directed=directed)
+    # flip to the default partition type if not overriden by the user
+    if partition_type is None:
+        partition_type = leidenalg.RBConfigurationVertexPartition
+    # Prepare find_partition arguments as a dictionary,
+    # appending to whatever the user provided. It needs to be this way
+    # as this allows for the accounting of a None resolution
+    # (in the case of a partition variant that doesn't take it on input)
+    if use_weights:
+        partition_kwargs['weights'] = np.array(g.es['weight']).astype(np.float64)
+    partition_kwargs['n_iterations'] = n_iterations
+    partition_kwargs['seed'] = random_state
+    if resolution is not None:
+        partition_kwargs['resolution_parameter'] = resolution
+    # clustering proper
+    part = leidenalg.find_partition(g, partition_type, **partition_kwargs)
+    # store output into adata.obs
+    groups = np.array(part.membership)
+    if restrict_to is not None:
+        if key_added == 'leiden':
+            key_added += '_R'
+        groups = rename_groups(
+            adata,
+            key_added,
+            restrict_key,
+            restrict_categories,
+            restrict_indices,
+            groups,
         )
-        # leiden_plan = LeidenClusteringPlan.do_all(enable_vf, modularity_threshold_per_round, modularity_threshold_total, max_iterations, min_graph_size, resolution, randomness)
-        leiden_clustering(graph, "value", "leiden_output", plan=leiden_plan)
-        stats = LeidenClusteringStatistics(graph, "value", "leiden_output")
-        print(stats)
-        groups = graph.get_node_property("leiden_output").to_numpy().astype('int')
-
     adata.obs[key_added] = pd.Categorical(
         values=groups.astype('U'),
         categories=natsorted(map(str, np.unique(groups))),

diff --git a/scanpy/tools/_louvain.py b/scanpy/tools/_louvain.py
@@ -1,5 +1,5 @@
 from types import MappingProxyType
-from typing import Optional, Tuple, Sequence, Type, Mapping, Any, Literal
+from typing import Optional, Tuple, Sequence, Type, Mapping, Any
 
 import numpy as np
 import pandas as pd
@@ -10,6 +10,7 @@
 
 from ._utils_clustering import rename_groups, restrict_adjacency
 from .. import _utils, logging as logg
+from .._compat import Literal
 from .._utils import _choose_graph
 
 try:
@@ -29,7 +30,7 @@ def louvain(
     restrict_to: Optional[Tuple[str, Sequence[str]]] = None,
     key_added: str = 'louvain',
     adjacency: Optional[spmatrix] = None,
-    flavor: Literal['vtraag', 'igraph', 'rapids', 'katana'] = 'vtraag',
+    flavor: Literal['vtraag', 'igraph', 'rapids'] = 'vtraag',
     directed: bool = True,
     use_weights: bool = False,
     partition_type: Optional[Type[MutableVertexPartition]] = None,
@@ -200,49 +201,8 @@ def louvain(
         groups = np.zeros(len(partition), dtype=int)
         for k, v in partition.items():
             groups[k] = v
-    elif flavor == 'katana':
-        # from scanpy._utils import _choose_graph
-
-        adjacency = _choose_graph(adata, obsp=None, neighbors_key=None)
-        sources, targets = adjacency.nonzero()
-        weights = adjacency[sources, targets]
-        if isinstance(weights, np.matrix):
-            weights = weights.A1
-
-        # import pandas as pd
-        # from natsort import natsorted
-        from katana.local import Graph
-        from katana.local.analytics import (
-            louvain_clustering,
-            LouvainClusteringStatistics,
-            LouvainClusteringPlan,
-        )
-        from katana.local.import_data import from_edge_list_arrays
-        import katana.local
-
-        katana.local.initialize()
-        property_dict = {"value": weights}
-        graph = from_edge_list_arrays(sources, targets, property_dict)
-        enable_vf = False
-        modularity_threshold_per_round = 0.0001
-        modularity_threshold_total = 0.0001
-        max_iterations = 100000
-        min_graph_size = 0
-        louvain_plan = LouvainClusteringPlan.do_all(
-            enable_vf,
-            modularity_threshold_per_round,
-            modularity_threshold_total,
-            max_iterations,
-            min_graph_size,
-        )
-        louvain_clustering(graph, "value", "output", plan=louvain_plan)
-        stats = LouvainClusteringStatistics(graph, "value", "output")
-        print(stats)
-        groups = graph.get_node_property("output").to_numpy().astype('int')
     else:
-        raise ValueError(
-            '`flavor` needs to be "vtraag" or "igraph" or "taynaud" or "katana".'
-        )
+        raise ValueError('`flavor` needs to be "vtraag" or "igraph" or "taynaud".')
     if restrict_to is not None:
         if key_added == 'louvain':
             key_added += '_R'