Skip to content

Commit

Permalink
Made log timing explicit
Browse files Browse the repository at this point in the history
  • Loading branch information
flying-sheep committed Jun 5, 2019
1 parent 53a3cca commit 9eca235
Show file tree
Hide file tree
Showing 21 changed files with 106 additions and 102 deletions.
63 changes: 35 additions & 28 deletions scanpy/logging.py
@@ -1,9 +1,9 @@
"""Logging and Profiling
"""
import logging
import time as time_
from logging import CRITICAL, ERROR, WARNING, INFO, DEBUG, NOTSET
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from typing import Optional

import anndata.logging

Expand All @@ -16,36 +16,43 @@ class RootLogger(logging.RootLogger):
def __init__(self, level):
super().__init__(level)
self.propagate = False
self.last_time = time_.time()

def log(self, level, msg, *, extra=None, deep=None, time_passed=None):
def log(
self,
level: int,
msg: str,
*,
extra: Optional[dict] = None,
time: datetime = None,
deep: Optional[str] = None,
) -> datetime:
now = datetime.now(timezone.utc)
time_passed: timedelta = None if time is None else now - time
extra = {
**(extra or {}),
'deep': deep,
'time_passed': time_passed,
'time_passed': time_passed
}
super().log(level, msg, extra=extra)
return now

def error(self, msg, *, deep=None, extra=None):
self.log(ERROR, msg, deep=deep, extra=extra)
def critical(self, msg, *, time=None, deep=None, extra=None) -> datetime:
return self.log(CRITICAL, msg, time=time, deep=deep, extra=extra)

def warning(self, msg, *, deep=None, extra=None):
self.log(WARNING, msg, deep=deep, extra=extra)
def error(self, msg, *, time=None, deep=None, extra=None) -> datetime:
return self.log(ERROR, msg, time=time, deep=deep, extra=extra)

def info(self, msg, *, time=False, deep=None, extra=None):
current_time = time_.time()
time_passed = None
if time:
time_passed = timedelta(seconds=current_time - self.last_time)
self.log(INFO, msg, time_passed=time_passed, deep=deep, extra=extra)
# Always reset on info calls. TODO: Maybe change to a kwarg?
self.last_time = time_.time()
def warning(self, msg, *, time=None, deep=None, extra=None) -> datetime:
return self.log(WARNING, msg, time=time, deep=deep, extra=extra)

def hint(self, msg, *, deep=None, extra=None):
self.log(HINT, msg, deep=deep, extra=extra)
def info(self, msg, *, time=None, deep=None, extra=None) -> datetime:
return self.log(INFO, msg, time=time, deep=deep, extra=extra)

def debug(self, msg, *, deep=None, extra=None):
self.log(DEBUG, msg, deep=deep, extra=extra)
def hint(self, msg, *, time=None, deep=None, extra=None) -> datetime:
return self.log(HINT, msg, time=time, deep=deep, extra=extra)

def debug(self, msg, *, time=None, deep=None, extra=None) -> datetime:
return self.log(DEBUG, msg, time=time, deep=deep, extra=extra)


def _set_log_file(settings):
Expand Down Expand Up @@ -77,12 +84,12 @@ def format(self, record: logging.LogRecord):
format_orig = self._style._fmt
if record.levelno == INFO:
self._style._fmt = '{asctime} | {message}'
if '{time_passed}' not in record.msg and record.time_passed:
self._style._fmt += ' ({time_passed})'
elif record.levelno == HINT:
self._style._fmt = '--> {message}'
elif record.levelno == DEBUG:
self._style._fmt = ' {message}'
if '{time_passed}' not in record.msg and record.time_passed:
self._style._fmt += ' ({time_passed})'
result = logging.Formatter.format(self, record)
self._style._fmt = format_orig
return result
Expand Down Expand Up @@ -143,8 +150,8 @@ def print_version_and_date():


# will be replaced in settings
def error(msg, *, deep=None): pass
def warning(msg, *, deep=None): pass
def info(msg, *, deep=None, time=False): pass
def hint(msg, *, deep=None): pass
def debug(msg, *, deep=None): pass
def error(msg, *, time=None, deep=None, extra=None) -> datetime: pass
def warning(msg, *, time=None, deep=None, extra=None) -> datetime: pass
def info(msg, *, time=None, deep=None, extra=None) -> datetime: pass
def hint(msg, *, time=None, deep=None, extra=None) -> datetime: pass
def debug(msg, *, time=None, deep=None, extra=None) -> datetime: pass
14 changes: 7 additions & 7 deletions scanpy/neighbors/__init__.py
Expand Up @@ -84,7 +84,7 @@ def neighbors(
Instead of decaying weights, this stores distances for each pair of
neighbors.
"""
logg.info('computing neighbors')
start = logg.info('computing neighbors')
adata = adata.copy() if copy else adata
if adata.isview: # we shouldn't need this here...
adata._init_as_actual(adata.copy())
Expand All @@ -109,7 +109,7 @@ def neighbors(
adata.uns['neighbors']['rp_forest'] = neighbors.rp_forest
logg.info(
' finished',
time=True,
time=start,
deep=(
'added to `.uns[\'neighbors\']`\n'
' \'distances\', distances for each pair of neighbors\n'
Expand Down Expand Up @@ -636,7 +636,7 @@ def compute_neighbors(
Also writes `.knn_indices` and `.knn_distances` if
`write_knn_indices==True`.
"""
logg.info('Computing neighbors')
start_neighbors = logg.info('Computing neighbors')
if n_neighbors > self._adata.shape[0]: # very small datasets
n_neighbors = 1 + int(0.5*self._adata.shape[0])
logg.warning(f'n_obs too small: adjusting to `n_neighbors = {n_neighbors}`')
Expand Down Expand Up @@ -672,7 +672,7 @@ def compute_neighbors(
if write_knn_indices:
self.knn_indices = knn_indices
self.knn_distances = knn_distances
logg.info('computed neighbors', time=True)
start_connect = logg.info('computed neighbors', time=start_neighbors)
if not use_dense_distances or method == 'umap':
# we need self._distances also for method == 'gauss' if we didn't
# use dense distances
Expand All @@ -686,7 +686,7 @@ def compute_neighbors(
# self._distances is unaffected by this
if method == 'gauss':
self._compute_connectivities_diffmap()
logg.info('computed connectivities', time=True)
logg.info('computed connectivities', time=start_connect)
self._number_connected_components = 1
if issparse(self._connectivities):
from scipy.sparse.csgraph import connected_components
Expand Down Expand Up @@ -771,7 +771,7 @@ def compute_transitions(self, density_normalize=True):
-------
Makes attributes `.transitions_sym` and `.transitions` available.
"""
logg.info('Computing transitions')
start = logg.info('Computing transitions')
W = self._connectivities
# density normalization as of Coifman et al. (2005)
# ensures that kernel matrix is independent of sampling density
Expand All @@ -794,7 +794,7 @@ def compute_transitions(self, density_normalize=True):
else:
self.Z = scipy.sparse.spdiags(1.0/z, 0, K.shape[0], K.shape[0])
self._transitions_sym = self.Z @ K @ self.Z
logg.info('computed transitions', time=True)
logg.info('computed transitions', time=start)

def compute_eigen(self, n_comps=15, sym=None, sort='decrease'):
"""Compute eigen decomposition of transition matrix.
Expand Down
4 changes: 2 additions & 2 deletions scanpy/preprocessing/_deprecated/highly_variable_genes.py
Expand Up @@ -113,7 +113,7 @@ def filter_genes_dispersion(data,
else:
adata.var['highly_variable'] = result['gene_subset']
return adata if copy else None
logg.info('extracting highly variable genes')
start = logg.info('extracting highly variable genes')
X = data # no copy necessary, X remains unchanged in the following
mean, var = materialize_as_ndarray(_get_mean_var(X))
# now actually compute the dispersion
Expand Down Expand Up @@ -183,7 +183,7 @@ def filter_genes_dispersion(data,
gene_subset = np.logical_and.reduce((mean > min_mean, mean < max_mean,
dispersion_norm > min_disp,
dispersion_norm < max_disp))
logg.info(' finished', time=True)
logg.info(' finished', time=start)
return np.rec.fromarrays((
gene_subset,
df['mean'].values,
Expand Down
4 changes: 2 additions & 2 deletions scanpy/preprocessing/_highly_variable_genes.py
Expand Up @@ -241,7 +241,7 @@ def highly_variable_genes(
This function replaces :func:`~scanpy.pp.filter_genes_dispersion`.
"""

logg.info('extracting highly variable genes')
start = logg.info('extracting highly variable genes')

if not isinstance(adata, AnnData):
raise ValueError(
Expand Down Expand Up @@ -298,7 +298,7 @@ def highly_variable_genes(
))
df['highly_variable'] = gene_subset

logg.info(' finished', time=True)
logg.info(' finished', time=start)

if inplace or subset:
logg.hint(
Expand Down
4 changes: 2 additions & 2 deletions scanpy/preprocessing/_magic.py
Expand Up @@ -102,7 +102,7 @@ def magic(adata,
'Please install magic package via `pip install --user '
'git+git://github.com/KrishnaswamyLab/MAGIC.git#subdirectory=python`')

logg.info('computing PHATE')
start = logg.info('computing PHATE')
needs_copy = not (name_list is None or
(isinstance(name_list, str) and
name_list in ["all_genes", "pca_only"]))
Expand All @@ -128,7 +128,7 @@ def magic(adata,
genes=name_list)
logg.info(
' finished',
time=True,
time=start,
deep=(
"added\n 'X_magic', PCA on MAGIC coordinates (adata.obsm)"
if name_list == "pca_only" else ''
Expand Down
4 changes: 2 additions & 2 deletions scanpy/preprocessing/_normalization.py
Expand Up @@ -139,7 +139,7 @@ def normalize_total(
' The following highly-expressed genes are not considered during '
f'normalization factor computation:\n{adata.var_names[~gene_subset].tolist()}'
)
logg.info(msg)
start = logg.info(msg)

# counts per cell for subset, if max_fraction!=1
X = adata.X if gene_subset is None else adata[:, gene_subset].X
Expand Down Expand Up @@ -189,7 +189,7 @@ def normalize_total(
logg.info(
' finished ({time_passed}):'
'normalized adata.X',
time=True,
time=start,
)
if key_added is not None:
logg.debug(f'and added {key_added!r}, counts per cell before normalization (adata.obs)')
Expand Down
4 changes: 2 additions & 2 deletions scanpy/preprocessing/_recipes.py
Expand Up @@ -99,7 +99,7 @@ def recipe_zheng17(adata, n_top_genes=1000, log=True, plot=False, copy=False):
-------
Returns or updates `adata` depending on `copy`.
"""
logg.info('running recipe zheng17')
start = logg.info('running recipe zheng17')
if copy: adata = adata.copy()
pp.filter_genes(adata, min_counts=1) # only consider genes with more than 1 count
pp.normalize_per_cell(adata, # normalize with total UMI count per cell
Expand All @@ -115,5 +115,5 @@ def recipe_zheng17(adata, n_top_genes=1000, log=True, plot=False, copy=False):
pp.normalize_per_cell(adata) # renormalize after filtering
if log: pp.log1p(adata) # log transform: X = log(X + 1)
pp.scale(adata)
logg.info(' finished', time=True)
logg.info(' finished', time=start)
return adata if copy else None
14 changes: 7 additions & 7 deletions scanpy/preprocessing/_simple.py
Expand Up @@ -445,7 +445,7 @@ def pca(
else:
adata = AnnData(data)

logg.info(f'computing PCA with n_comps = {n_comps}')
start = logg.info(f'computing PCA with n_comps = {n_comps}')

if adata.n_vars < n_comps:
n_comps = adata.n_vars - 1
Expand Down Expand Up @@ -520,7 +520,7 @@ def pca(
adata.uns['pca'] = {}
adata.uns['pca']['variance'] = pca_.explained_variance_
adata.uns['pca']['variance_ratio'] = pca_.explained_variance_ratio_
logg.info(' finished', time=True)
logg.info(' finished', time=start)
logg.debug(
'and added\n'
' \'X_pca\', the PCA coordinates (adata.obs)\n'
Expand All @@ -530,7 +530,7 @@ def pca(
)
return adata if copy else None
else:
logg.info(' finished', time=True)
logg.info(' finished', time=start)
if return_info:
return X_pca, pca_.components_, pca_.explained_variance_ratio_, pca_.explained_variance_
else:
Expand Down Expand Up @@ -617,7 +617,7 @@ def normalize_per_cell(
"""
if key_n_counts is None: key_n_counts = 'n_counts'
if isinstance(data, AnnData):
logg.info('normalizing by total count per cell')
start = logg.info('normalizing by total count per cell')
adata = data.copy() if copy else data
if counts_per_cell is None:
cell_subset, counts_per_cell = materialize_as_ndarray(
Expand All @@ -644,7 +644,7 @@ def normalize_per_cell(
logg.info(
' finished ({time_passed}): normalized adata.X and added'
f' {key_n_counts!r}, counts per cell before normalization (adata.obs)',
time=True,
time=start,
)
return adata if copy else None
# proceed with data matrix
Expand Down Expand Up @@ -729,7 +729,7 @@ def regress_out(adata, keys, n_jobs=None, copy=False) -> Optional[AnnData]:
-------
Depending on `copy` returns or updates `adata` with the corrected data matrix.
"""
logg.info(f'regressing out {keys}')
start = logg.info(f'regressing out {keys}')
if issparse(adata.X):
logg.info(
' sparse input is densified and may '
Expand Down Expand Up @@ -801,7 +801,7 @@ def regress_out(adata, keys, n_jobs=None, copy=False) -> Optional[AnnData]:
# res is a list of vectors (each corresponding to a regressed gene column).
# The transpose is needed to get the matrix in the shape needed
adata.X = np.vstack(res).T.astype(adata.X.dtype)
logg.info(' finished', time=True)
logg.info(' finished', time=start)
return adata if copy else None


Expand Down
14 changes: 7 additions & 7 deletions scanpy/readwrite.py
Expand Up @@ -137,11 +137,11 @@ def read_10x_h5(
`adata.var_names`. The gene IDs are stored in `adata.var['gene_ids']`.
The feature types are stored in `adata.var['feature_types']`
"""
logg.info(f'reading {filename}')
start = logg.info(f'reading {filename}')
with tables.open_file(str(filename), 'r') as f:
v3 = '/matrix' in f
if v3:
adata = _read_v3_10x_h5(filename)
adata = _read_v3_10x_h5(filename, start=start)
if genome:
if genome not in adata.var['genome'].values:
raise ValueError(
Expand All @@ -153,10 +153,10 @@ def read_10x_h5(
adata = adata[:, list(map(lambda x: x == 'Gene Expression', adata.var['feature_types']))]
return adata
else:
return _read_legacy_10x_h5(filename, genome=genome)
return _read_legacy_10x_h5(filename, genome=genome, start=start)


def _read_legacy_10x_h5(filename, genome=None):
def _read_legacy_10x_h5(filename, *, genome=None, start=None):
"""
Read hdf5 file from Cell Ranger v2 or earlier versions.
"""
Expand Down Expand Up @@ -201,13 +201,13 @@ def _read_legacy_10x_h5(filename, genome=None):
gene_ids=dsets['genes'].astype(str),
),
)
logg.info('', time=True)
logg.info('', time=start)
return adata
except KeyError:
raise Exception('File is missing one or more required datasets.')


def _read_v3_10x_h5(filename):
def _read_v3_10x_h5(filename, *, start=None):
"""
Read hdf5 file from Cell Ranger v3 or later versions.
"""
Expand Down Expand Up @@ -236,7 +236,7 @@ def _read_v3_10x_h5(filename):
genome=dsets['genome'].astype(str),
),
)
logg.info('', time=True)
logg.info('', time=start)
return adata
except KeyError:
raise Exception('File is missing one or more required datasets.')
Expand Down

0 comments on commit 9eca235

Please sign in to comment.