Skip to content

Commit

Permalink
Removing Master Changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Sakshi-2797 committed Mar 27, 2023
1 parent 686e6a9 commit 0cf93d9
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 148 deletions.
45 changes: 38 additions & 7 deletions scanpy/preprocessing/_simple.py
Expand Up @@ -10,6 +10,7 @@
import numba
import numpy as np
import scipy as sp
import time
from scipy.sparse import issparse, isspmatrix_csr, csr_matrix, spmatrix
from sklearn.utils import sparsefuncs, check_array
from pandas.api.types import is_categorical_dtype
Expand Down Expand Up @@ -827,6 +828,35 @@ def scale_sparse(
)


@numba.njit(cache=True, parallel=True)
def do_scale(X, maxv, nthr):
# nthr = numba.get_num_threads()
# t0= time.time()
s = np.zeros((nthr, X.shape[1]))
ss = np.zeros((nthr, X.shape[1]))
mean = np.zeros(X.shape[1])
std = np.zeros(X.shape[1])
n = X.shape[0]
for i in numba.prange(nthr):
for r in range(i, n, nthr):
for c in range(X.shape[1]):
v = X[r, c]
s[i, c] += v
ss[i, c] += v * v
for c in numba.prange(X.shape[1]):
s0 = s[:, c].sum()
mean[c] = s0 / n
std[c] = np.sqrt((ss[:, c].sum() - s0 * s0 / n) / (n - 1))

# with numba.objmode():
# print ("finshed getting means, stddev", time.time()-t0)
for r, c in numba.pndindex(X.shape):
v = (X[r, c] - mean[c]) / std[c]
X[r, c] = maxv if v > maxv else v
# with numba.objmode():
# print ("finshed scaling values", time.time()-t0)


@scale.register(AnnData)
def scale_anndata(
adata: AnnData,
Expand All @@ -840,13 +870,14 @@ def scale_anndata(
adata = adata.copy() if copy else adata
view_to_actual(adata)
X = _get_obs_rep(adata, layer=layer, obsm=obsm)
X, adata.var["mean"], adata.var["std"] = scale(
X,
zero_center=zero_center,
max_value=max_value,
copy=False, # because a copy has already been made, if it were to be made
return_mean_std=True,
)
# X, adata.var["mean"], adata.var["std"] = scale(
# X,
# zero_center=zero_center,
# max_value=max_value,
# copy=False, # because a copy has already been made, if it were to be made
# return_mean_std=True,
# )
do_scale(adata.X, max_value, numba.get_num_threads())
_set_obs_rep(adata, X, layer=layer, obsm=obsm)
if copy:
return adata
Expand Down
147 changes: 50 additions & 97 deletions scanpy/tools/_leiden.py
@@ -1,4 +1,4 @@
from typing import Optional, Tuple, Sequence, Type, Literal
from typing import Optional, Tuple, Sequence, Type

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -36,7 +36,6 @@ def leiden(
neighbors_key: Optional[str] = None,
obsp: Optional[str] = None,
copy: bool = False,
flavor: Literal['default', 'katana'] = 'default',
**partition_kwargs,
) -> Optional[AnnData]:
"""\
Expand Down Expand Up @@ -105,103 +104,57 @@ def leiden(
A dict with the values for the parameters `resolution`, `random_state`,
and `n_iterations`.
"""
if flavor == 'default':
try:
import leidenalg
except ImportError:
raise ImportError(
'Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip3 install leidenalg`.'
)
partition_kwargs = dict(partition_kwargs)

start = logg.info('running Leiden clustering')
adata = adata.copy() if copy else adata
# are we clustering a user-provided graph or the default AnnData one?
if adjacency is None:
adjacency = _utils._choose_graph(adata, obsp, neighbors_key)
if restrict_to is not None:
restrict_key, restrict_categories = restrict_to
adjacency, restrict_indices = restrict_adjacency(
adata,
restrict_key,
restrict_categories,
adjacency,
)
# convert it to igraph
g = _utils.get_igraph_from_adjacency(adjacency, directed=directed)
# flip to the default partition type if not overriden by the user
if partition_type is None:
partition_type = leidenalg.RBConfigurationVertexPartition
# Prepare find_partition arguments as a dictionary,
# appending to whatever the user provided. It needs to be this way
# as this allows for the accounting of a None resolution
# (in the case of a partition variant that doesn't take it on input)
if use_weights:
partition_kwargs['weights'] = np.array(g.es['weight']).astype(np.float64)
partition_kwargs['n_iterations'] = n_iterations
partition_kwargs['seed'] = random_state
if resolution is not None:
partition_kwargs['resolution_parameter'] = resolution
# clustering proper
part = leidenalg.find_partition(g, partition_type, **partition_kwargs)
# store output into adata.obs
groups = np.array(part.membership)
if restrict_to is not None:
if key_added == 'leiden':
key_added += '_R'
groups = rename_groups(
adata,
key_added,
restrict_key,
restrict_categories,
restrict_indices,
groups,
)

elif flavor == 'katana':
from scanpy._utils import _choose_graph

adjacency = _choose_graph(adata, obsp=None, neighbors_key=None)

sources, targets = adjacency.nonzero()
weights = adjacency[sources, targets]
if isinstance(weights, np.matrix):
weights = weights.A1
start = logg.info('running Leiden clustering')

from katana.local.analytics import (
leiden_clustering,
LeidenClusteringStatistics,
LeidenClusteringPlan,
try:
import leidenalg
except ImportError:
raise ImportError(
'Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip3 install leidenalg`.'
)
from katana.local.import_data import from_edge_list_arrays

property_dict = {"value": weights}
graph = from_edge_list_arrays(sources, targets, property_dict)

enable_vf = True
modularity_threshold_per_round = 0.0001
modularity_threshold_total = 0.0001
max_iterations = 1000000
min_graph_size = 0
resolution = 1.0
randomness = 0

leiden_plan = LeidenClusteringPlan.deterministic(
enable_vf,
modularity_threshold_per_round,
modularity_threshold_total,
max_iterations,
min_graph_size,
resolution,
randomness,
partition_kwargs = dict(partition_kwargs)

start = logg.info('running Leiden clustering')
adata = adata.copy() if copy else adata
# are we clustering a user-provided graph or the default AnnData one?
if adjacency is None:
adjacency = _utils._choose_graph(adata, obsp, neighbors_key)
if restrict_to is not None:
restrict_key, restrict_categories = restrict_to
adjacency, restrict_indices = restrict_adjacency(
adata,
restrict_key,
restrict_categories,
adjacency,
)
# convert it to igraph
g = _utils.get_igraph_from_adjacency(adjacency, directed=directed)
# flip to the default partition type if not overriden by the user
if partition_type is None:
partition_type = leidenalg.RBConfigurationVertexPartition
# Prepare find_partition arguments as a dictionary,
# appending to whatever the user provided. It needs to be this way
# as this allows for the accounting of a None resolution
# (in the case of a partition variant that doesn't take it on input)
if use_weights:
partition_kwargs['weights'] = np.array(g.es['weight']).astype(np.float64)
partition_kwargs['n_iterations'] = n_iterations
partition_kwargs['seed'] = random_state
if resolution is not None:
partition_kwargs['resolution_parameter'] = resolution
# clustering proper
part = leidenalg.find_partition(g, partition_type, **partition_kwargs)
# store output into adata.obs
groups = np.array(part.membership)
if restrict_to is not None:
if key_added == 'leiden':
key_added += '_R'
groups = rename_groups(
adata,
key_added,
restrict_key,
restrict_categories,
restrict_indices,
groups,
)
# leiden_plan = LeidenClusteringPlan.do_all(enable_vf, modularity_threshold_per_round, modularity_threshold_total, max_iterations, min_graph_size, resolution, randomness)
leiden_clustering(graph, "value", "leiden_output", plan=leiden_plan)
stats = LeidenClusteringStatistics(graph, "value", "leiden_output")
print(stats)
groups = graph.get_node_property("leiden_output").to_numpy().astype('int')

adata.obs[key_added] = pd.Categorical(
values=groups.astype('U'),
categories=natsorted(map(str, np.unique(groups))),
Expand Down
48 changes: 4 additions & 44 deletions scanpy/tools/_louvain.py
@@ -1,5 +1,5 @@
from types import MappingProxyType
from typing import Optional, Tuple, Sequence, Type, Mapping, Any, Literal
from typing import Optional, Tuple, Sequence, Type, Mapping, Any

import numpy as np
import pandas as pd
Expand All @@ -10,6 +10,7 @@

from ._utils_clustering import rename_groups, restrict_adjacency
from .. import _utils, logging as logg
from .._compat import Literal
from .._utils import _choose_graph

try:
Expand All @@ -29,7 +30,7 @@ def louvain(
restrict_to: Optional[Tuple[str, Sequence[str]]] = None,
key_added: str = 'louvain',
adjacency: Optional[spmatrix] = None,
flavor: Literal['vtraag', 'igraph', 'rapids', 'katana'] = 'vtraag',
flavor: Literal['vtraag', 'igraph', 'rapids'] = 'vtraag',
directed: bool = True,
use_weights: bool = False,
partition_type: Optional[Type[MutableVertexPartition]] = None,
Expand Down Expand Up @@ -200,49 +201,8 @@ def louvain(
groups = np.zeros(len(partition), dtype=int)
for k, v in partition.items():
groups[k] = v
elif flavor == 'katana':
# from scanpy._utils import _choose_graph

adjacency = _choose_graph(adata, obsp=None, neighbors_key=None)
sources, targets = adjacency.nonzero()
weights = adjacency[sources, targets]
if isinstance(weights, np.matrix):
weights = weights.A1

# import pandas as pd
# from natsort import natsorted
from katana.local import Graph
from katana.local.analytics import (
louvain_clustering,
LouvainClusteringStatistics,
LouvainClusteringPlan,
)
from katana.local.import_data import from_edge_list_arrays
import katana.local

katana.local.initialize()
property_dict = {"value": weights}
graph = from_edge_list_arrays(sources, targets, property_dict)
enable_vf = False
modularity_threshold_per_round = 0.0001
modularity_threshold_total = 0.0001
max_iterations = 100000
min_graph_size = 0
louvain_plan = LouvainClusteringPlan.do_all(
enable_vf,
modularity_threshold_per_round,
modularity_threshold_total,
max_iterations,
min_graph_size,
)
louvain_clustering(graph, "value", "output", plan=louvain_plan)
stats = LouvainClusteringStatistics(graph, "value", "output")
print(stats)
groups = graph.get_node_property("output").to_numpy().astype('int')
else:
raise ValueError(
'`flavor` needs to be "vtraag" or "igraph" or "taynaud" or "katana".'
)
raise ValueError('`flavor` needs to be "vtraag" or "igraph" or "taynaud".')
if restrict_to is not None:
if key_added == 'louvain':
key_added += '_R'
Expand Down

0 comments on commit 0cf93d9

Please sign in to comment.