Skip to content

Commit

Permalink
Merge branch 'release/1.0.5'
Browse files Browse the repository at this point in the history
  • Loading branch information
horta committed May 10, 2017
2 parents e59bd31 + 80d10c7 commit a5bc1c6
Show file tree
Hide file tree
Showing 9 changed files with 234 additions and 15 deletions.
6 changes: 3 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ env:
global:
- PKG_NAME=limix
- LIKNORM=true
- PY_DEPS="numpy scipy scikit-learn pandas h5py tables matplotlib
limix-legacy limix-core sphinx_rtd_theme ncephes ndarray-listener
brent-search"
- PY_DEPS="numpy scipy scikit-learn pandas h5py tables matplotlib numpy-sugar pandas-plink
limix-legacy limix-core sphinx_rtd_theme ncephes ndarray-listener liknorm-py
brent-search git+https://github.com/dask/partd git+https://github.com/dask/zict git+https://github.com/dask/distributed git+https://github.com/mrocklin/sparse"
- secure: TGjO9nGH88bYMoRG3nBKYZn1Xj43/DzBK6lqWAL+2D7rd6jryPTSMy9R0luuqOUubhPCI7BpQVLKYXsRi0T0EGeRm4WbjQa2AD9TZlrvgRSWftMZ0U5UEvAoONMAsXV3CpND6Ey5yynRALjvt/4kH4a1IjIOwvSepxIwN7bGEFIXGyFq/scl65DT/7TFsv7UKzIKaOO6U9dq0vFekciCX+uuNF5pX7A8SMFrO6ydRfUtO47o9M+ieoGHgkQw0KyjD8wjbMa/2TXvpY6tAf8BnyYH8XhqJ7Tztzzc9Hbpxqr8b4LjnGr3UEfz6c0benzLufYwoQyi6T6T4ur1H19i0OS9LO/v9HrWkhuzx/15HH5b2GEZJK8DcnoWc/1csX5W9M3TbWhOIiCW8fyujVwcOznah7GW9cwPFuWP7+JZiw5E2B+u8ukkio9QsG7BV4+YJZCBevOGe70QNiEwUhF9E9TRyCXuQvORYv8nKIMoQ8GnujSEoHLmYMbdYEM/bLcOVfsfDOEWJPUvtBJ+Mmhz1DyMcS4qpUtStSu5Io38l1ZD7AdZ8iGGPkeYfF7ke4ZWQgx9CGrXVgYmyj016p0Z0mXr7GSDkhyIo4nhHb2EoH1rdW29T+GbSyz4Q9AtZNghkvZbYeFyvqsoWqXLn27dG91t7CZ7hT25gSag49VTbnA=
deploy:
provider: pypi
Expand Down
2 changes: 1 addition & 1 deletion doc/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ dependencies:
- beautifulsoup4=4.5.3=py35_0
- bitarray=0.8.1=py35_0
- blaze=0.10.1=py35_0
- bokeh=0.12.4=py35_0
- boto=2.45.0=py35_0
- bottleneck=1.2.0=np111py35_0
- cairo=1.14.8=0
Expand Down Expand Up @@ -184,6 +183,7 @@ dependencies:
- spyder=3.1.2=py35_0
- sqlalchemy=1.1.5=py35_0
- sqlite=3.13.0=0
- joblib=0.11=py35_0
- statsmodels=0.6.1=np111py35_1
- sympy=1.0=py35_0
- terminado=0.6=py35_0
Expand Down
2 changes: 1 addition & 1 deletion limix/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pkg_resources import DistributionNotFound as _DistributionNotFound
from pkg_resources import get_distribution as _get_distribution

from . import io, plot, qtl, scripts, stats, util, varDecomp, iSet, mtSet
from . import io, iSet, mtSet, plot, qtl, scripts, stats, util, varDecomp
from .mtSet import MTSet

try:
Expand Down
1 change: 1 addition & 0 deletions limix/plot/manhattan.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def plot_manhattan(pv, position=None, posCum=None, chromBounds=None,
plt.subplot(111)
plot_manhattan(pv, [chrom, pos])
plt.tight_layout()
plt.show()
"""
import matplotlib.pylab as plt
Expand Down
17 changes: 11 additions & 6 deletions limix/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,22 @@
- :func:`.qvalues`
- :func:`.empirical_pvalues`
- :class:`.Chi2mixture`
- :func:`.indep_pairwise`
- :func:`.maf`
Public interface
^^^^^^^^^^^^^^^^
"""

from .pca import pca
from .trans import boxcox
from .kinship import gower_norm
from .chi2mixture import Chi2mixture
from .fdr import qvalues
from .kinship import gower_norm
from .pca import pca
from .preprocess import indep_pairwise, maf
from .teststats import empirical_pvalues
from .chi2mixture import Chi2mixture
from .trans import boxcox

__all__ = ['pca', 'boxcox', 'gower_norm', 'qvalues',
'empirical_pvalues', 'Chi2mixture']
__all__ = [
'pca', 'boxcox', 'gower_norm', 'qvalues', 'empirical_pvalues',
'Chi2mixture', 'indep_pairwise', 'maf'
]
176 changes: 176 additions & 0 deletions limix/stats/preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
from __future__ import division

import scipy.spatial
from joblib import Parallel, cpu_count, delayed
from numpy import (asarray, ascontiguousarray, double, einsum, isfinite,
logical_not, minimum, newaxis, sqrt, unique, zeros)
from scipy.spatial import _distance_wrap
from tqdm import tqdm


def _row_norms(X):
norms = einsum('ij,ij->i', X, X, dtype=double)
return sqrt(norms, out=norms)


def _sq_pearson(X):
m = X.shape[0]
dm = zeros((m * (m - 1)) // 2, dtype=double)

X2 = X - X.mean(1)[:, newaxis]
X2 = ascontiguousarray(X2)
norms = _row_norms(X2)
_distance_wrap.pdist_cosine_wrap(X2, dm, norms)
return (-dm + 1)**2


def _pdist_threshold(mark, dist, thr):
mark[:] = False
size = len(mark)

l = 0
for i in range(0, size - 1):
if mark[i]:
l += size - (i + 1)
continue

for j in range(i + 1, size):
if dist[l] > thr:
mark[j] = True
l += 1


def func(x, excls, threshold):
dist = _sq_pearson(x)
e = zeros(x.shape[0], dtype=bool)
_pdist_threshold(e, dist, threshold)
excls |= e


def indep_pairwise(X, window_size, step_size, threshold, verbose=True):
r"""
Determine pair-wise independent variants.
Independent variants are defined via squared Pearson correlations between
pairs of variants inside a sliding window.
Parameters
----------
X : array_like
Sample by variants matrix.
window_size : int
Number of variants inside each window.
step_size : int
Number of variants the sliding window skips.
threshold : float
Squared Pearson correlation threshold for independence.
verbose : bool
`True` for progress information; `False` otherwise.
Returns
-------
ok : boolean array defining independent variants
Examples
--------
.. doctest::
>>> from numpy.random import RandomState
>>> from limix.stats import indep_pairwise
>>>
>>> random = RandomState(0)
>>> X = random.randn(10, 20)
>>>
>>> indep_pairwise(X, 4, 2, 0.5, verbose=False)
array([ True, True, False, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True], dtype=bool)
"""
left = 0
excls = zeros(X.shape[1], dtype=bool)
excl = zeros(window_size, dtype=bool)

assert step_size <= window_size

n = (X.shape[1] + step_size) // step_size

steps = list(range(n))
cc = max(1, cpu_count())

with tqdm(total=n, desc='Indep. pairwise', disable=not verbose) as pbar:

while len(steps) > 0:
i = 0
right = 0
delayeds = []
while i < len(steps):

step = steps[i]
left = step * step_size
if left < right:
i += 1
continue

del steps[i]
right = min(left + window_size, X.shape[1])
x = ascontiguousarray(X[:, left:right].T)

delayeds.append(delayed(func)(x, excls[left:right], threshold))
if len(delayeds) == cc:
Parallel(
n_jobs=min(len(delayeds), cc),
backend='threading')(delayeds)
pbar.update(len(delayeds))
delayeds = []

if len(delayeds) == 0:
continue

Parallel(
n_jobs=min(len(delayeds), cc), backend='threading')(delayeds)
pbar.update(len(delayeds))

return logical_not(excls)


def _check_encoding(X):
u = unique(X)
u = u[isfinite(u)]
if len(u) > 3:
return False
return all([i in set([0, 1, 2]) for i in u])


def maf(X):
r"""Compute minor allele frequencies.
It assumes that `X` encodes 0, 1, and 2 representing the number
of alleles.
Args:
X (array_like): Genotype matrix.
Returns:
array_like: minor allele frequencies.
Examples
--------
.. doctest::
>>> from numpy.random import RandomState
>>> from limix.stats import maf
>>>
>>> random = RandomState(0)
>>> X = random.randint(0, 3, size=(100, 10))
>>>
>>> print(maf(X))
[ 0.49 0.49 0.445 0.495 0.5 0.45 0.48 0.48 0.47 0.435]
"""
ok = _check_encoding(X)
if not ok:
raise ValueError("It assumes that X encodes 0, 1, and 2 only.")
s0 = X.sum(0)
s0 = s0 / (2 * X.shape[0])
s1 = 1 - s0
return minimum(s0, s1)
32 changes: 32 additions & 0 deletions limix/stats/test/test_preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from __future__ import division

from dask.array import from_array
from numpy import logical_not
from numpy.random import RandomState
from numpy.testing import assert_allclose, assert_equal

from limix.stats import indep_pairwise, maf


def test_preprocess_indep_pairwise():
random = RandomState(0)

X = random.randn(3, 100)

head = [True, True, False, True, False]
tail = [True, True, False, False]

assert_equal(indep_pairwise(X, 4, 2, 0.5, verbose=False)[:5], head)
assert_equal(indep_pairwise(X, 4, 2, 0.5, verbose=False)[-4:], tail)

X = from_array(X, chunks=(2, 10))
assert_equal(indep_pairwise(X, 4, 2, 0.5, verbose=False)[:5], head)
assert_equal(indep_pairwise(X, 4, 2, 0.5, verbose=False)[-4:], tail)


def test_preprocess_maf():
random = RandomState(0)

X = random.randint(0, 3, size=(100, 10))
assert_allclose(
maf(X), [0.49, 0.49, 0.445, 0.495, 0.5, 0.45, 0.48, 0.48, 0.47, 0.435])
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
test = pytest

[tool:pytest]
addopts = --doctest-modules
addopts = --doctest-modules -x
script_launch_mode = subprocess
norecursedirs = doc .eggs
10 changes: 7 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import sys


from setuptools import find_packages, setup

try:
Expand All @@ -23,8 +24,11 @@ def setup_package():

setup_requires = ["cython", "numpy"] + pytest_runner
install_requires = [
'scikit-learn', 'limix-core>=1.0.1', 'dask[complete]', 'h5py',
'pandas-plink>=1.1.6', 'limix-legacy', 'glimix-core>=1.2.4'
'scikit-learn', 'limix-core>=1.0.1',
'dask[array,bag,dataframe,delayed]>=0.14', 'h5py',
'pandas-plink>=1.1.7', 'limix-legacy', 'glimix-core>=1.2.4',
'joblib>=0.11', 'tqdm>=4.10', 'scipy>=0.18', 'distributed',
'numpy-sugar>=1.0.38', 'ncephes>=1.0.26'
]
tests_require = ['pytest', 'pytest-console-scripts']

Expand All @@ -42,7 +46,7 @@ def setup_package():

metadata = dict(
name='limix',
version='1.0.4',
version='1.0.5',
maintainer="Limix Developers",
maintainer_email="horta@ebi.ac.uk",
author=("Christoph Lippert, Danilo Horta, " +
Expand Down

0 comments on commit a5bc1c6

Please sign in to comment.