Skip to content

Commit

Permalink
stats doc ok
Browse files Browse the repository at this point in the history
  • Loading branch information
horta committed Apr 16, 2019
1 parent e48c089 commit da3ab79
Show file tree
Hide file tree
Showing 11 changed files with 185 additions and 164 deletions.
2 changes: 1 addition & 1 deletion doc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ help:

clean: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
rm -rf _generated/
rm -rf _generated/ ; rm -rf api/

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
Expand Down
1 change: 0 additions & 1 deletion doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ Statistics
limix.stats.allele_frequency
limix.stats.compute_dosage
limix.stats.confusion_matrix
limix.stats.effsizes_se
limix.stats.empirical_pvalues
limix.stats.linear_kinship
limix.stats.lrt_pvalues
Expand Down
6 changes: 0 additions & 6 deletions doc/api/limix.stats.effsizes_se.rst

This file was deleted.

33 changes: 25 additions & 8 deletions doc/stats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
Statistics
**********

.. autofunction:: limix.stats.Chi2Mixture
:noindex:
Alleles
=======

.. autofunction:: limix.stats.allele_expectation
:noindex:
Expand All @@ -14,23 +14,40 @@ Statistics
.. autofunction:: limix.stats.compute_dosage
:noindex:

.. autofunction:: limix.stats.confusion_matrix
:noindex:
Chi-square mixture
==================

.. autofunction:: limix.stats.effsizes_se
.. autofunction:: limix.stats.Chi2Mixture
:noindex:

.. autofunction:: limix.stats.empirical_pvalues
Ground truth evaluation
=======================

.. autofunction:: limix.stats.confusion_matrix
:noindex:

Kinship
=======

.. autofunction:: limix.stats.linear_kinship
:noindex:

Likelihood-ratio test
=====================

.. autofunction:: limix.stats.lrt_pvalues
:noindex:

.. autofunction:: limix.stats.multipletests
:noindex:
Principal component analysis
============================

.. autofunction:: limix.stats.pca
:noindex:

P-value correction
==================

.. autofunction:: limix.stats.multipletests
:noindex:
.. autofunction:: limix.stats.empirical_pvalues
:noindex:
3 changes: 1 addition & 2 deletions limix/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from ._chi2 import Chi2Mixture
from ._confusion import confusion_matrix
from ._kinship import linear_kinship
from ._lrt import effsizes_se, lrt_pvalues
from ._lrt import lrt_pvalues
from ._allele import allele_frequency, compute_dosage, allele_expectation
from ._pvalue import multipletests, empirical_pvalues
from ._pca import pca
Expand All @@ -16,7 +16,6 @@
"allele_frequency",
"compute_dosage",
"confusion_matrix",
"effsizes_se",
"empirical_pvalues",
"linear_kinship",
"lrt_pvalues",
Expand Down
160 changes: 83 additions & 77 deletions limix/stats/_allele.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,80 @@
from .._bits.dask import array_shape_reveal


def allele_frequency(expec):
r"""Compute allele frequency from its expectation.
def allele_frequency(X):
"""
Compute allele frequency from its expectation.
``X`` is a matrix whose last dimension correspond to the different set of
chromosomes. The first dimension represent different variants and the second
dimension represent the different samples.
Parameters
----------
expec : array_like
X : array_like
Allele expectations encoded as a variants-by-samples-by-alleles matrix.
Returns
-------
:class:`numpy.ndarray`
frequenct : ndarray
Allele frequencies encoded as a variants-by-alleles matrix.
"""
ploidy = expec.shape[-1]
if expec.ndim < 3:
ploidy = X.shape[-1]
if X.ndim < 3:
n = 1
else:
n = expec.shape[1]
return expec.sum(-2) / (ploidy * n)
n = X.shape[1]
return X.sum(-2) / (ploidy * n)


def compute_dosage(expec, alt=None):
r"""Compute dosage from allele expectation.
def compute_dosage(X, alt=None):
"""
Compute dosage from allele expectation.
Parameters
----------
expec : array_like
X : array_like
Allele expectations encoded as a variants-by-samples-by-alleles matrix.
ref : array_like
Allele reference of each locus. The allele having the minor allele frequency
for the provided ``expec`` is used as the reference if `None`. Defaults to
for the provided ``X`` is used as the reference if `None`. Defaults to
`None`.
Returns
-------
:class:`numpy.ndarray`
dosage : ndarray
Dosage encoded as a variants-by-samples matrix.
Examples
--------
Example
-------
.. doctest::
>>> from bgen_reader import read_bgen, allele_expectation, example_files
>>> from bgen_reader import compute_dosage
>>>
>>> with example_files("example.32bits.bgen") as filepath:
... bgen = read_bgen(filepath, verbose=False)
... variant_idx = 2
... e = allele_expectation(bgen, variant_idx)
... dosage = compute_dosage(e)
... print(dosage[:5])
[0.01550294 0.99383543 1.97933958 0.99560547 1.97879027]
"""
>>> from bgen_reader import read_bgen, allele_expectation, example_files
>>> from bgen_reader import compute_dosage
>>>
>>> with example_files("example.32bits.bgen") as filepath:
... bgen = read_bgen(filepath, verbose=False)
... variant_idx = 2
... e = allele_expectation(bgen, variant_idx)
... dosage = compute_dosage(e)
... print(dosage[:5])
[0.01550294 0.99383543 1.97933958 0.99560547 1.97879027]
"""
from numpy import asarray

if alt is None:
return expec[..., -1]
return X[..., -1]
try:
return expec[alt, :, alt]
return X[alt, :, alt]
except NotImplementedError:
alt = asarray(alt, int)
return asarray(expec, float)[alt, :, alt]
return asarray(X, float)[alt, :, alt]


def allele_expectation(p, nalleles, ploidy):
r"""Allele expectation.
"""
Allele expectation.
Compute the expectation of each allele from the given probabilities.
It accepts three shapes of matrices:
Expand All @@ -86,60 +93,59 @@ def allele_expectation(p, nalleles, ploidy):
Returns
-------
:class:`numpy.ndarray`
expectation : ndarray
Last dimension will contain the expectation of each allele.
Examples
--------
.. doctest::
>>> from texttable import Texttable
>>> from bgen_reader import read_bgen, allele_expectation, example_files
>>>
>>> sampleid = "sample_005"
>>> rsid = "RSID_6"
>>>
>>> with example_files("example.32bits.bgen") as filepath:
... bgen = read_bgen(filepath, verbose=False)
...
... locus = bgen["variants"].query("rsid == '{}'".format(rsid)).index.compute().item()
... sample = bgen["samples"].to_frame().query("id == '{}'".format(sampleid)).index
... sample = sample[0]
...
... nalleles = bgen["variants"].loc[locus]["nalleles"]
... ploidy = 2
...
... p = bgen["genotype"][locus].compute()["probs"][sample]
... # For unphased genotypes only.
... e = allele_expectation(bgen, locus)[sample]
...
... alleles = bgen["variants"].loc[locus]["allele_ids"].compute().item().split(",")
...
... tab = Texttable()
...
... print(tab.add_rows(
... [
... ["", "AA", "AG", "GG", "E[.]"],
... ["p"] + list(p) + [1.0],
... ["#" + alleles[0], 2, 1, 0, e[0]],
... ["#" + alleles[1], 0, 1, 2, e[1]],
... ]
... ).draw())
+----+-------+-------+-------+-------+
| | AA | AG | GG | E[.] |
+====+=======+=======+=======+=======+
| p | 0.012 | 0.987 | 0.001 | 1 |
+----+-------+-------+-------+-------+
| #A | 2 | 1 | 0 | 1.011 |
+----+-------+-------+-------+-------+
| #G | 0 | 1 | 2 | 0.989 |
+----+-------+-------+-------+-------+
>>> print("variant: {}".format(rsid))
variant: RSID_6
>>> print("sample : {}".format(sampleid))
sample : sample_005
>>> from texttable import Texttable
>>> from bgen_reader import read_bgen, allele_expectation, example_files
>>>
>>> sampleid = "sample_005"
>>> rsid = "RSID_6"
>>>
>>> with example_files("example.32bits.bgen") as filepath:
... bgen = read_bgen(filepath, verbose=False)
...
... locus = bgen["variants"].query("rsid == '{}'".format(rsid)).index.compute().item()
... sample = bgen["samples"].to_frame().query("id == '{}'".format(sampleid)).index
... sample = sample[0]
...
... nalleles = bgen["variants"].loc[locus]["nalleles"]
... ploidy = 2
...
... p = bgen["genotype"][locus].compute()["probs"][sample]
... # For unphased genotypes only.
... e = allele_expectation(bgen, locus)[sample]
...
... alleles = bgen["variants"].loc[locus]["allele_ids"].compute().item().split(",")
...
... tab = Texttable()
...
... print(tab.add_rows(
... [
... ["", "AA", "AG", "GG", "E[.]"],
... ["p"] + list(p) + [1.0],
... ["#" + alleles[0], 2, 1, 0, e[0]],
... ["#" + alleles[1], 0, 1, 2, e[1]],
... ]
... ).draw())
+----+-------+-------+-------+-------+
| | AA | AG | GG | E[.] |
+====+=======+=======+=======+=======+
| p | 0.012 | 0.987 | 0.001 | 1 |
+----+-------+-------+-------+-------+
| #A | 2 | 1 | 0 | 1.011 |
+----+-------+-------+-------+-------+
| #G | 0 | 1 | 2 | 0.989 |
+----+-------+-------+-------+-------+
>>> print("variant: {}".format(rsid))
variant: RSID_6
>>> print("sample : {}".format(sampleid))
sample : sample_005
Note
----
Expand Down
42 changes: 35 additions & 7 deletions limix/stats/_kinship.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,52 @@


def linear_kinship(G, out=None, verbose=True):
r"""Estimate Kinship matrix via linear kernel.
"""
Estimate Kinship matrix via linear kernel.
Let š‘‘ be the number of columns of ``G``. The resulting matrix is given by:
.. math::
š™ŗ = šš‡šš‡įµ€/š‘‘
where
.. math::
šš‡įµ¢ā±¼ = (š™¶įµ¢ā±¼ - š‘šā±¼) / š‘ ā±¼
is the matrix ``G`` column-wise normalized by means š‘šā±¼ and standard deviations š‘ ā±¼.
NaNs are ignored so as to produce matrix ``K`` having only real numbers.
This functions is specially designed to also handle large matrices ``G`` that would
otherwise require a large amount of memory if it were to be loaded in memory first.
For those cases, libraries like Dask come in handy.
Parameters
----------
G : array_like
Samples-by-variants matrix.
out : ndarray
A location into which the result is stored.
verbose : bool, optional
``True`` for showing progress; ``False`` otherwise. Defauts to ``True``.
Examples
--------
.. doctest::
>>> from numpy.random import RandomState
>>> from numpy import array_str
>>> from limix.stats import linear_kinship
>>>
>>> random = RandomState(1)
>>> X = random.randn(4, 100)
>>> K = linear_kinship(X, verbose=False)
>>> print(array_str(K, precision=4))
[[ 0.9131 -0.1928 -0.3413 -0.379 ]
[-0.1928 0.8989 -0.2356 -0.4704]
[-0.3413 -0.2356 0.9578 -0.3808]
[-0.379 -0.4704 -0.3808 1.2302]]
>>> print(K) # doctest: +FLOAT_CMP
[[ 0.91314823 -0.19283362 -0.34133897 -0.37897564]
[-0.19283362 0.89885153 -0.2356003 -0.47041761]
[-0.34133897 -0.2356003 0.95777313 -0.38083386]
[-0.37897564 -0.47041761 -0.38083386 1.23022711]]
"""
from numpy import sqrt, zeros, asfortranarray, where, asarray, nanmean, std, isnan
from scipy.linalg.blas import get_blas_funcs
Expand Down

0 comments on commit da3ab79

Please sign in to comment.