qc doc done

limix · Apr 16, 2019 · 12079e8 · 12079e8
1 parent da3ab79
commit 12079e8
Show file tree

Hide file tree

Showing 15 changed files with 88 additions and 110 deletions.
diff --git a/doc/api.rst b/doc/api.rst
@@ -30,7 +30,6 @@ Quality control
     limix.qc.mean_standardize
     limix.qc.normalise_covariance
     limix.qc.quantile_gaussianize
-    limix.qc.regress_out
     limix.qc.remove_dependent_cols
     limix.qc.unique_variants
 

diff --git a/doc/api/limix.qc.regress_out.rst b/doc/api/limix.qc.regress_out.rst
diff --git a/doc/cli.rst b/doc/cli.rst
@@ -96,6 +96,9 @@ HDF5
 
 The following command shows the hierarchy of a HDF5 file:
 
+.. command-output:: limix download http://rest.s3for.me/limix/small_example.hdf5
+   :cwd: _build
+
 .. command-output:: limix see small_example.hdf5
    :cwd: _build
 

diff --git a/doc/qc.rst b/doc/qc.rst
@@ -3,52 +3,49 @@ Quality control
 ***************
 
 Box-Cox
-^^^^^^^
+=======
 
 .. autofunction:: limix.qc.boxcox
     :noindex:
 
 Dependent columns
-^^^^^^^^^^^^^^^^^
+=================
 
 .. autofunction:: limix.qc.remove_dependent_cols
     :noindex:
 
+.. autofunction:: limix.qc.unique_variants
+    :noindex:
+
 Genotype
-^^^^^^^^
+========
 
 .. autofunction:: limix.qc.indep_pairwise
     :noindex:
+
 .. autofunction:: limix.qc.compute_maf
     :noindex:
 
 Impute
-^^^^^^
+======
 
 .. autofunction:: limix.qc.mean_impute
     :noindex:
+
 .. autofunction:: limix.qc.count_missingness
     :noindex:
 
 Kinship
-^^^^^^^
+=======
 
 .. autofunction:: limix.qc.normalise_covariance
     :noindex:
 
 Normalisation
-^^^^^^^^^^^^^
+=============
 
 .. autofunction:: limix.qc.mean_standardize
     :noindex:
-.. autofunction:: limix.qc.quantile_gaussianize
-    :noindex:
 
-
-
-Regression
-^^^^^^^^^^
-
-.. autofunction:: limix.qc.regress_out
+.. autofunction:: limix.qc.quantile_gaussianize
     :noindex:
-
diff --git a/limix/qc/__init__.py b/limix/qc/__init__.py
@@ -1,25 +1,23 @@
 from ._allele import compute_maf
+from ._boxcox import boxcox
+from ._covariance import normalise_covariance
 from ._impute import mean_impute
-from .kinship import normalise_covariance
-from .ld import indep_pairwise
-from .linalg import remove_dependent_cols
-from .missing import count_missingness
-from .regress import regress_out
-from .trans import boxcox
-from ._mean_standardize import mean_standardize
-from ._quantile_gaussianize import quantile_gaussianize
-from .unique import unique_variants
+from ._ld import indep_pairwise
+from ._linalg import remove_dependent_cols
+from ._mean_std import mean_standardize
+from ._missing import count_missingness
+from ._quant_gauss import quantile_gaussianize
+from ._unique import unique_variants
 
 __all__ = [
     "boxcox",
+    "compute_maf",
+    "count_missingness",
+    "indep_pairwise",
+    "mean_impute",
     "mean_standardize",
+    "normalise_covariance",
     "quantile_gaussianize",
-    "regress_out",
     "remove_dependent_cols",
-    "mean_impute",
-    "indep_pairwise",
-    "count_missingness",
-    "compute_maf",
-    "normalise_covariance",
     "unique_variants",
 ]
diff --git a/limix/qc/trans.py → limix/qc/_boxcox.py b/limix/qc/trans.py → limix/qc/_boxcox.py
@@ -4,7 +4,7 @@
 
 
 def boxcox(x):
-    r"""Box Cox transformation for normality conformance.
+    r"""Box-Cox transformation for normality conformance.
 
     It applies the power transformation
 
@@ -16,7 +16,7 @@ def boxcox(x):
         \end{cases}
 
     to the provided data, hopefully making it more normal distribution-like.
-    The :math:`\lambda` parameter is fit by maximum likelihood estimation.
+    The λ parameter is fit by maximum likelihood estimation.
 
     Parameters
     ----------
@@ -25,30 +25,28 @@ def boxcox(x):
 
     Returns
     -------
-    array_like
-        Box Cox transformed data.
+    boxcox : ndarray
+        Box-Cox transformed data.
 
     Examples
     --------
     .. plot::
 
-        import limix
-        from matplotlib import pyplot as plt
-        import numpy as np
-        import scipy.stats as stats
-
-        np.random.seed(0)
-
-        x = stats.loggamma.rvs(0.1, size=100)
-        y = limix.qc.boxcox(x)
-
-        fig = plt.figure()
-
-        ax1 = fig.add_subplot(211)
-        stats.probplot(x, dist=stats.norm, plot=ax1)
-
-        ax2 = fig.add_subplot(212)
-        stats.probplot(y, dist=stats.norm, plot=ax2)
+        >>> import limix
+        >>> import numpy as np
+        >>> import scipy.stats as stats
+        ...
+        >>> np.random.seed(0)
+        ...
+        >>> x = stats.loggamma.rvs(0.1, size=100)
+        >>> y = limix.qc.boxcox(x)
+        ...
+        >>> plt = limix.plot.get_pyplot()
+        ...
+        >>> _, (ax1, ax2) = plt.subplots(2, 1)
+        >>> _ = stats.probplot(x, dist=stats.norm, plot=ax1)
+        >>> _ = stats.probplot(y, dist=stats.norm, plot=ax2)
+        >>> plt.tight_layout()
     """
     import dask.array as da
     import numpy as np

diff --git a/limix/qc/kinship.py → limix/qc/_covariance.py b/limix/qc/kinship.py → limix/qc/_covariance.py
@@ -1,5 +1,6 @@
 def normalise_covariance(K, out=None):
-    r"""Variance rescaling of covariance matrix 𝙺.
+    """
+    Variance rescaling of covariance matrix 𝙺.
 
     Let n be the number of rows (or columns) of 𝙺 and let
     mᵢ be the average of the values in the i-th column.

diff --git a/limix/qc/_impute.py b/limix/qc/_impute.py
@@ -3,7 +3,8 @@
 
 
 def mean_impute(X, axis=-1, inplace=False):
-    r"""Impute ``NaN`` values.
+    """
+    Impute ``NaN`` values.
 
     It defaults to column-wise imputation.
 
@@ -18,7 +19,7 @@ def mean_impute(X, axis=-1, inplace=False):
 
     Returns
     -------
-    array_like
+    ndarray
         Imputed array.
 
     Examples

diff --git a/limix/qc/ld.py → limix/qc/_ld.py b/limix/qc/ld.py → limix/qc/_ld.py
@@ -2,7 +2,8 @@
 
 
 def indep_pairwise(X, window_size, step_size, threshold, verbose=True):
-    r"""Determine pair-wise independent variants.
+    """
+    Determine pair-wise independent variants.
 
     Independent variants are defined via squared Pearson correlations between
     pairs of variants inside a sliding window.
@@ -22,10 +23,12 @@ def indep_pairwise(X, window_size, step_size, threshold, verbose=True):
 
     Returns
     -------
-    ok : boolean array defining independent variants
+    ok : ndarray
+        Boolean array defining independent variants
+
+    Example
+    -------
 
-    Examples
-    --------
     .. doctest::
 
         >>> from numpy.random import RandomState

diff --git a/limix/qc/linalg.py → limix/qc/_linalg.py b/limix/qc/linalg.py → limix/qc/_linalg.py
@@ -1,16 +1,19 @@
-def remove_dependent_cols(X, tol=1e-6, verbose=False):
-    r"""Remove dependent columns.
+def remove_dependent_cols(X, tol=1e-6):
+    """
+    Remove dependent columns.
 
     Return a matrix with dependent columns removed.
 
     Parameters
     ----------
     X : array_like
         Matrix to might have dependent columns.
+    tol : float
+        Threshold above which columns are considered dependents.
 
     Returns
     -------
-    array_like
+    rank : ndarray
         Full column rank matrix.
     """
     from scipy.linalg import qr
@@ -20,10 +23,8 @@ def remove_dependent_cols(X, tol=1e-6, verbose=False):
 
     R = qr(X, mode="r")[0][: X.shape[1], :]
     I = npabs(R.diagonal()) > tol
-    if npany(~I) and verbose:
-        msg = "Columns " + str(where(~I)[0])
-        print(msg + " have been removed because linear dependence")
+    if npany(~I):
         R = X[:, I]
     else:
-        R = X.copy()
+        R = X
     return R
diff --git a/limix/qc/_mean_standardize.py → limix/qc/_mean_std.py b/limix/qc/_mean_standardize.py → limix/qc/_mean_std.py
@@ -3,36 +3,33 @@
 
 
 def mean_standardize(X, axis=-1, inplace=False):
-    r"""Zero-mean and one-deviation normalisation.
+    """
+    Zero-mean and one-deviation normalisation.
 
     Normalise in such a way that the mean and variance are equal to zero and
-    one.
-    This transformation is taken over the flattened array by default, otherwise
-    over the specified axis.
-    Missing values represented by ``NaN`` are ignored.
+    one. This transformation is taken over the flattened array by default, otherwise
+    over the specified axis. Missing values represented by ``NaN`` are ignored.
 
     Parameters
     ----------
     X : array_like
         Array of values.
     axis : int, optional
-        Axis value. Defaults to `1`.
+        Axis value. Defaults to ``1``.
     inplace : bool, optional
-        Defaults to `False`.
-
+        Defaults to ``False``.
 
     Returns
     -------
-    array_like
+    X : ndarray
         Normalized array.
 
-    Examples
-    --------
-
+    Example
+    -------
     .. doctest::
 
         >>> import limix
-        >>> from numpy import arange, array_str
+        >>> from numpy import arange
         >>>
         >>> X = arange(15).reshape((5, 3)).astype(float)
         >>> print(X)
@@ -43,7 +40,7 @@ def mean_standardize(X, axis=-1, inplace=False):
          [12. 13. 14.]]
         >>> X = arange(6).reshape((2, 3)).astype(float)
         >>> X = limix.qc.mean_standardize(X, axis=0)
-        >>> print(X)  # doctest: +FLOAT_CMP
+        >>> print(X) # doctest: +FLOAT_CMP
         [[-1.22474487  0.          1.22474487]
          [-1.22474487  0.          1.22474487]]
     """

diff --git a/limix/qc/missing.py → limix/qc/_missing.py b/limix/qc/missing.py → limix/qc/_missing.py
@@ -1,12 +1,10 @@
-from __future__ import division
-
-
 def count_missingness(X):
-    r"""Count the number of missing values per column.
+    """
+    Count the number of missing values per column.
 
     Returns
     -------
-    array_like
+    count : ndarray
         Number of missing values per column.
     """
     import dask.array as da

diff --git a/limix/qc/_quantile_gaussianize.py → limix/qc/_quant_gauss.py b/limix/qc/_quantile_gaussianize.py → limix/qc/_quant_gauss.py
diff --git a/limix/qc/unique.py → limix/qc/_unique.py b/limix/qc/unique.py → limix/qc/_unique.py
@@ -1,19 +1,19 @@
 def unique_variants(X):
-    r"""Filters out variants with the same genetic profile.
+    """
+    Filters out variants with the same genetic profile.
 
     Parameters
     ----------
-    X : ndarray
-        (`N`, `S`) ndarray of genotype values for `N` individuals and `S`
-        variants.
+    X : array_like
+        Samples-by-variants matrix of genotype values.
 
     Returns
     -------
-    ndarray
+    genotype : ndarray
         Genotype array with unique variants.
 
-    Examples
-    --------
+    Example
+    -------
     .. doctest::
 
         >>> from numpy.random import RandomState

diff --git a/limix/qc/regress.py b/limix/qc/regress.py