Skip to content

Commit

Permalink
Changes in Scale function
Browse files Browse the repository at this point in the history
  • Loading branch information
Sakshi-2797 committed Mar 28, 2023
2 parents 7ffde34 + b162be6 commit 383c8a8
Showing 1 changed file with 49 additions and 39 deletions.
88 changes: 49 additions & 39 deletions scanpy/preprocessing/_simple.py
Expand Up @@ -708,41 +708,12 @@ def _regress_out_chunk(data):
return np.vstack(responses_chunk_list)


@numba.njit(cache=True, parallel=True)
def do_scale(X, maxv, nthr):
# nthr = numba.get_num_threads()
# t0= time.time()
s = np.zeros((nthr, X.shape[1]))
ss = np.zeros((nthr, X.shape[1]))
mean = np.zeros((X.shape[1]))
std = np.zeros((X.shape[1]))
n = X.shape[0]
for i in numba.prange(nthr):
for r in range(i, n, nthr):
for c in range(X.shape[1]):
v = X[r, c]
s[i, c] += v
ss[i, c] += v * v
for c in numba.prange(X.shape[1]):
s0 = s[:, c].sum()
mean[c] = s0 / n
std[c] = np.sqrt((ss[:, c].sum() - s0 * s0 / n) / (n - 1))

# with numba.objmode():
# print ("finshed getting means, stddev", time.time()-t0)
for r, c in numba.pndindex(X.shape):
v = (X[r, c] - mean[c]) / std[c]
X[r, c] = maxv if v > maxv else v

# with numba.objmode():
# print ("finshed scaling values", time.time()-t0)


@singledispatch
def scale(
X: Union[AnnData, spmatrix, np.ndarray],
zero_center: bool = True,
max_value: Optional[float] = None,
flavor: Optional[Literal['default', 'use_fastpp']] = 'default',
copy: bool = False,
layer: Optional[str] = None,
obsm: Optional[str] = None,
Expand Down Expand Up @@ -786,6 +757,39 @@ def scale(
return scale_array(X, zero_center=zero_center, max_value=max_value, copy=copy)


@numba.njit(cache=True, parallel=True)
def do_scale(X, maxv, nthr):
# nthr = numba.get_num_threads()
# t0= time.time()
s = np.zeros((nthr, X.shape[1]))
ss = np.zeros((nthr, X.shape[1]))
mean = np.zeros((X.shape[1]))
std = np.zeros((X.shape[1]))
n = X.shape[0]
for i in numba.prange(nthr):
for r in range(i, n, nthr):
for c in range(X.shape[1]):
v = X[r, c]
s[i, c] += v
ss[i, c] += v * v
for c in numba.prange(X.shape[1]):
s0 = s[:, c].sum()
mean[c] = s0 / n
std[c] = np.sqrt((ss[:, c].sum() - s0 * s0 / n) / (n - 1))

# with numba.objmode():
# print ("finshed getting means, stddev", time.time()-t0)
for r, c in numba.pndindex(X.shape):
v = (X[r, c] - mean[c]) / std[c]
if maxv is not None:
X[r, c] = maxv if v > maxv else v
else:
X[r, c] = v
return X, mean, std
# with numba.objmode():
# print ("finshed scaling values", time.time()-t0)


@scale.register(np.ndarray)
def scale_array(
X,
Expand All @@ -794,6 +798,7 @@ def scale_array(
max_value: Optional[float] = None,
copy: bool = False,
return_mean_std: bool = False,
flavor: Optional[Literal['default', 'use_fastpp']] = 'default',
):
if copy:
X = X.copy()
Expand Down Expand Up @@ -825,7 +830,6 @@ def scale_array(
if max_value is not None:
logg.debug(f"... clipping at max_value {max_value}")
X[X > max_value] = max_value

if return_mean_std:
return X, mean, std
else:
Expand All @@ -840,6 +844,7 @@ def scale_sparse(
max_value: Optional[float] = None,
copy: bool = False,
return_mean_std: bool = False,
flavor: Optional[Literal['default', 'use_fastpp']] = 'default',
):
# need to add the following here to make inplace logic work
if zero_center:
Expand All @@ -864,21 +869,26 @@ def scale_anndata(
*,
zero_center: bool = True,
max_value: Optional[float] = None,
flavor: Optional[Literal['default', 'use_fastpp']] = 'default',
copy: bool = False,
layer: Optional[str] = None,
obsm: Optional[str] = None,
) -> Optional[AnnData]:
adata = adata.copy() if copy else adata
view_to_actual(adata)
X = _get_obs_rep(adata, layer=layer, obsm=obsm)
# X, adata.var["mean"], adata.var["std"] = scale(
# X,
# zero_center=zero_center,
# max_value=max_value,
# copy=False, # because a copy has already been made, if it were to be made
# return_mean_std=True,
# )
do_scale(adata.X, max_value, numba.get_num_threads())
if flavor == 'default':
X, adata.var["mean"], adata.var["std"] = scale(
X,
zero_center=zero_center,
max_value=max_value,
copy=False, # because a copy has already been made, if it were to be made
return_mean_std=True,
)
if flavor == 'use_fastpp':
X, adata.var["mean"], adata.var["std"] = do_scale(
X, max_value, numba.get_num_threads()
)
_set_obs_rep(adata, X, layer=layer, obsm=obsm)
if copy:
return adata
Expand Down

0 comments on commit 383c8a8

Please sign in to comment.