/
_magic.py
151 lines (137 loc) · 5.7 KB
/
_magic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""Denoise high-dimensional data using MAGIC
"""
from .._settings import settings
from .. import logging as logg
def magic(adata,
name_list=None,
k=10,
a=15,
t='auto',
n_pca=100,
knn_dist='euclidean',
random_state=None,
n_jobs=None,
verbose=False,
copy=None,
**kwargs):
"""Markov Affinity-based Graph Imputation of Cells (MAGIC) API [vanDijk18]_.
MAGIC is an algorithm for denoising and transcript recover of single cells
applied to single-cell sequencing data. MAGIC builds a graph from the data
and uses diffusion to smooth out noise and recover the data manifold.
More information and bug reports
`here <https://github.com/KrishnaswamyLab/MAGIC>`__. For help, visit
<https://krishnaswamylab.org/get-help>.
Parameters
----------
adata : :class:`~scanpy.api.AnnData`
An anndata file with `.raw` attribute representing raw counts.
name_list : `list`, `'all_genes'`, or `'pca_only'`, optional (default: `'all_genes'`)
Denoised genes to return. Default is all genes, but this
may require a large amount of memory if the input data is sparse.
k : int, optional, default: 10
number of nearest neighbors on which to build kernel
a : int, optional, default: 15
sets decay rate of kernel tails.
If None, alpha decaying kernel is not used
t : int, optional, default: 'auto'
power to which the diffusion operator is powered.
This sets the level of diffusion. If 'auto', t is selected
according to the Procrustes disparity of the diffused data
n_pca : int, optional, default: 100
Number of principal components to use for calculating
neighborhoods. For extremely large datasets, using
n_pca < 20 allows neighborhoods to be calculated in
roughly log(n_samples) time.
knn_dist : string, optional, default: 'euclidean'
recommended values: 'euclidean', 'cosine', 'precomputed'
Any metric from `scipy.spatial.distance` can be used
distance metric for building kNN graph. If 'precomputed',
`data` should be an n_samples x n_samples distance or
affinity matrix
random_state : `int`, `numpy.RandomState` or `None`, optional (default: `None`)
Random seed. Defaults to the global `numpy` random number generator
n_jobs : `int` or None, optional. Default: None
Number of threads to use in training. All cores are used by default.
verbose : `bool`, `int` or `None`, optional (default: `sc.settings.verbosity`)
If `True` or an integer `>= 2`, print status messages.
If `None`, `sc.settings.verbosity` is used.
copy : `bool` or `None`, optional. Default: `None`.
If true, a copy of anndata is returned. If `None`, `copy` is True if
`genes` is not `'all_genes'` or `'pca_only'`. `copy` may only be False
if `genes` is `'all_genes'` or `'pca_only'`, as the resultant data
will otherwise have different column names from the input data.
kwargs : additional arguments to `magic.MAGIC`
Returns
-------
If `copy` is True, AnnData object is returned.
If `subset_genes` is not `all_genes`, PCA on MAGIC values of cells are stored in
`adata.obsm['X_magic']` and `adata.X` is not modified.
The raw counts are stored in `.raw` attribute of AnnData object.
Examples
--------
>>> import scanpy.api as sc
>>> import magic
>>> adata = sc.datasets.paul15()
>>> sc.pp.normalize_per_cell(adata)
>>> sc.pp.sqrt(adata) # or sc.pp.log1p(adata)
>>> adata_magic = sc.pp.magic(adata, name_list=['Mpo', 'Klf1', 'Ifitm1'], k=5)
>>> adata_magic.shape
(2730, 3)
>>> sc.pp.magic(adata, name_list='pca_only', k=5)
>>> adata.obsm['X_magic'].shape
(2730, 100)
>>> sc.pp.magic(adata, name_list='all_genes', k=5)
>>> adata.X.shape
(2730, 3451)
"""
try:
from magic import MAGIC
except ImportError:
raise ImportError(
'Please install magic package via `pip install --user '
'git+git://github.com/KrishnaswamyLab/MAGIC.git#subdirectory=python`')
start = logg.info('computing PHATE')
needs_copy = not (name_list is None or
(isinstance(name_list, str) and
name_list in ["all_genes", "pca_only"]))
if copy is None:
copy = needs_copy
elif needs_copy and not copy:
raise ValueError(
"Can only perform MAGIC in-place with `name_list=='all_genes' or "
"`name_list=='pca_only'` (got {}). Consider setting "
"`copy=True`".format(name_list))
adata = adata.copy() if copy else adata
n_jobs = settings.n_jobs if n_jobs is None else n_jobs
X_magic = MAGIC(k=k,
a=a,
t=t,
n_pca=n_pca,
knn_dist=knn_dist,
random_state=random_state,
n_jobs=n_jobs,
verbose=verbose,
**kwargs).fit_transform(adata,
genes=name_list)
logg.info(
' finished',
time=start,
deep=(
"added\n 'X_magic', PCA on MAGIC coordinates (adata.obsm)"
if name_list == "pca_only" else ''
)
)
# update AnnData instance
if name_list == "pca_only":
# special case - update adata.obsm with smoothed values
adata.obsm["X_magic"] = X_magic.X
elif copy:
# just return X_magic
X_magic.raw = adata
adata = X_magic
else:
# replace data with smoothed data
adata.raw = adata
adata.X = X_magic.X
if copy:
return adata