Skip to content

Commit

Permalink
qc doc done
Browse files Browse the repository at this point in the history
  • Loading branch information
horta committed Apr 16, 2019
1 parent da3ab79 commit 12079e8
Show file tree
Hide file tree
Showing 15 changed files with 88 additions and 110 deletions.
1 change: 0 additions & 1 deletion doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ Quality control
limix.qc.mean_standardize
limix.qc.normalise_covariance
limix.qc.quantile_gaussianize
limix.qc.regress_out
limix.qc.remove_dependent_cols
limix.qc.unique_variants

Expand Down
6 changes: 0 additions & 6 deletions doc/api/limix.qc.regress_out.rst

This file was deleted.

3 changes: 3 additions & 0 deletions doc/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ HDF5

The following command shows the hierarchy of a HDF5 file:

.. command-output:: limix download http://rest.s3for.me/limix/small_example.hdf5
:cwd: _build

.. command-output:: limix see small_example.hdf5
:cwd: _build

Expand Down
27 changes: 12 additions & 15 deletions doc/qc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,52 +3,49 @@ Quality control
***************

Box-Cox
^^^^^^^
=======

.. autofunction:: limix.qc.boxcox
:noindex:

Dependent columns
^^^^^^^^^^^^^^^^^
=================

.. autofunction:: limix.qc.remove_dependent_cols
:noindex:

.. autofunction:: limix.qc.unique_variants
:noindex:

Genotype
^^^^^^^^
========

.. autofunction:: limix.qc.indep_pairwise
:noindex:

.. autofunction:: limix.qc.compute_maf
:noindex:

Impute
^^^^^^
======

.. autofunction:: limix.qc.mean_impute
:noindex:

.. autofunction:: limix.qc.count_missingness
:noindex:

Kinship
^^^^^^^
=======

.. autofunction:: limix.qc.normalise_covariance
:noindex:

Normalisation
^^^^^^^^^^^^^
=============

.. autofunction:: limix.qc.mean_standardize
:noindex:
.. autofunction:: limix.qc.quantile_gaussianize
:noindex:



Regression
^^^^^^^^^^

.. autofunction:: limix.qc.regress_out
.. autofunction:: limix.qc.quantile_gaussianize
:noindex:

28 changes: 13 additions & 15 deletions limix/qc/__init__.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,23 @@
from ._allele import compute_maf
from ._boxcox import boxcox
from ._covariance import normalise_covariance
from ._impute import mean_impute
from .kinship import normalise_covariance
from .ld import indep_pairwise
from .linalg import remove_dependent_cols
from .missing import count_missingness
from .regress import regress_out
from .trans import boxcox
from ._mean_standardize import mean_standardize
from ._quantile_gaussianize import quantile_gaussianize
from .unique import unique_variants
from ._ld import indep_pairwise
from ._linalg import remove_dependent_cols
from ._mean_std import mean_standardize
from ._missing import count_missingness
from ._quant_gauss import quantile_gaussianize
from ._unique import unique_variants

__all__ = [
"boxcox",
"compute_maf",
"count_missingness",
"indep_pairwise",
"mean_impute",
"mean_standardize",
"normalise_covariance",
"quantile_gaussianize",
"regress_out",
"remove_dependent_cols",
"mean_impute",
"indep_pairwise",
"count_missingness",
"compute_maf",
"normalise_covariance",
"unique_variants",
]
40 changes: 19 additions & 21 deletions limix/qc/trans.py → limix/qc/_boxcox.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


def boxcox(x):
r"""Box Cox transformation for normality conformance.
r"""Box-Cox transformation for normality conformance.
It applies the power transformation
Expand All @@ -16,7 +16,7 @@ def boxcox(x):
\end{cases}
to the provided data, hopefully making it more normal distribution-like.
The :math:`\lambda` parameter is fit by maximum likelihood estimation.
The λ parameter is fit by maximum likelihood estimation.
Parameters
----------
Expand All @@ -25,30 +25,28 @@ def boxcox(x):
Returns
-------
array_like
Box Cox transformed data.
boxcox : ndarray
Box-Cox transformed data.
Examples
--------
.. plot::
import limix
from matplotlib import pyplot as plt
import numpy as np
import scipy.stats as stats
np.random.seed(0)
x = stats.loggamma.rvs(0.1, size=100)
y = limix.qc.boxcox(x)
fig = plt.figure()
ax1 = fig.add_subplot(211)
stats.probplot(x, dist=stats.norm, plot=ax1)
ax2 = fig.add_subplot(212)
stats.probplot(y, dist=stats.norm, plot=ax2)
>>> import limix
>>> import numpy as np
>>> import scipy.stats as stats
...
>>> np.random.seed(0)
...
>>> x = stats.loggamma.rvs(0.1, size=100)
>>> y = limix.qc.boxcox(x)
...
>>> plt = limix.plot.get_pyplot()
...
>>> _, (ax1, ax2) = plt.subplots(2, 1)
>>> _ = stats.probplot(x, dist=stats.norm, plot=ax1)
>>> _ = stats.probplot(y, dist=stats.norm, plot=ax2)
>>> plt.tight_layout()
"""
import dask.array as da
import numpy as np
Expand Down
3 changes: 2 additions & 1 deletion limix/qc/kinship.py → limix/qc/_covariance.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
def normalise_covariance(K, out=None):
r"""Variance rescaling of covariance matrix 𝙺.
"""
Variance rescaling of covariance matrix 𝙺.
Let n be the number of rows (or columns) of 𝙺 and let
mᵢ be the average of the values in the i-th column.
Expand Down
5 changes: 3 additions & 2 deletions limix/qc/_impute.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@


def mean_impute(X, axis=-1, inplace=False):
r"""Impute ``NaN`` values.
"""
Impute ``NaN`` values.
It defaults to column-wise imputation.
Expand All @@ -18,7 +19,7 @@ def mean_impute(X, axis=-1, inplace=False):
Returns
-------
array_like
ndarray
Imputed array.
Examples
Expand Down
11 changes: 7 additions & 4 deletions limix/qc/ld.py → limix/qc/_ld.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@


def indep_pairwise(X, window_size, step_size, threshold, verbose=True):
r"""Determine pair-wise independent variants.
"""
Determine pair-wise independent variants.
Independent variants are defined via squared Pearson correlations between
pairs of variants inside a sliding window.
Expand All @@ -22,10 +23,12 @@ def indep_pairwise(X, window_size, step_size, threshold, verbose=True):
Returns
-------
ok : boolean array defining independent variants
ok : ndarray
Boolean array defining independent variants
Example
-------
Examples
--------
.. doctest::
>>> from numpy.random import RandomState
Expand Down
15 changes: 8 additions & 7 deletions limix/qc/linalg.py → limix/qc/_linalg.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
def remove_dependent_cols(X, tol=1e-6, verbose=False):
r"""Remove dependent columns.
def remove_dependent_cols(X, tol=1e-6):
"""
Remove dependent columns.
Return a matrix with dependent columns removed.
Parameters
----------
X : array_like
Matrix to might have dependent columns.
tol : float
Threshold above which columns are considered dependents.
Returns
-------
array_like
rank : ndarray
Full column rank matrix.
"""
from scipy.linalg import qr
Expand All @@ -20,10 +23,8 @@ def remove_dependent_cols(X, tol=1e-6, verbose=False):

R = qr(X, mode="r")[0][: X.shape[1], :]
I = npabs(R.diagonal()) > tol
if npany(~I) and verbose:
msg = "Columns " + str(where(~I)[0])
print(msg + " have been removed because linear dependence")
if npany(~I):
R = X[:, I]
else:
R = X.copy()
R = X
return R
25 changes: 11 additions & 14 deletions limix/qc/_mean_standardize.py → limix/qc/_mean_std.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,36 +3,33 @@


def mean_standardize(X, axis=-1, inplace=False):
r"""Zero-mean and one-deviation normalisation.
"""
Zero-mean and one-deviation normalisation.
Normalise in such a way that the mean and variance are equal to zero and
one.
This transformation is taken over the flattened array by default, otherwise
over the specified axis.
Missing values represented by ``NaN`` are ignored.
one. This transformation is taken over the flattened array by default, otherwise
over the specified axis. Missing values represented by ``NaN`` are ignored.
Parameters
----------
X : array_like
Array of values.
axis : int, optional
Axis value. Defaults to `1`.
Axis value. Defaults to ``1``.
inplace : bool, optional
Defaults to `False`.
Defaults to ``False``.
Returns
-------
array_like
X : ndarray
Normalized array.
Examples
--------
Example
-------
.. doctest::
>>> import limix
>>> from numpy import arange, array_str
>>> from numpy import arange
>>>
>>> X = arange(15).reshape((5, 3)).astype(float)
>>> print(X)
Expand All @@ -43,7 +40,7 @@ def mean_standardize(X, axis=-1, inplace=False):
[12. 13. 14.]]
>>> X = arange(6).reshape((2, 3)).astype(float)
>>> X = limix.qc.mean_standardize(X, axis=0)
>>> print(X) # doctest: +FLOAT_CMP
>>> print(X) # doctest: +FLOAT_CMP
[[-1.22474487 0. 1.22474487]
[-1.22474487 0. 1.22474487]]
"""
Expand Down
8 changes: 3 additions & 5 deletions limix/qc/missing.py → limix/qc/_missing.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from __future__ import division


def count_missingness(X):
r"""Count the number of missing values per column.
"""
Count the number of missing values per column.
Returns
-------
array_like
count : ndarray
Number of missing values per column.
"""
import dask.array as da
Expand Down
File renamed without changes.
14 changes: 7 additions & 7 deletions limix/qc/unique.py → limix/qc/_unique.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
def unique_variants(X):
r"""Filters out variants with the same genetic profile.
"""
Filters out variants with the same genetic profile.
Parameters
----------
X : ndarray
(`N`, `S`) ndarray of genotype values for `N` individuals and `S`
variants.
X : array_like
Samples-by-variants matrix of genotype values.
Returns
-------
ndarray
genotype : ndarray
Genotype array with unique variants.
Examples
--------
Example
-------
.. doctest::
>>> from numpy.random import RandomState
Expand Down
12 changes: 0 additions & 12 deletions limix/qc/regress.py

This file was deleted.

0 comments on commit 12079e8

Please sign in to comment.