Skip to content

Commit

Permalink
kinship df
Browse files Browse the repository at this point in the history
  • Loading branch information
horta committed Jan 30, 2019
1 parent 7c4ff10 commit 8a58490
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 6 deletions.
49 changes: 43 additions & 6 deletions limix/qc/kinship.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,49 @@ def normalise_covariance(K, out=None):
.. _Dask: https://dask.pydata.org/
"""
from numpy import copyto, asarray

K = asarray(K, float)
c = (K.shape[0] - 1) / (K.trace() - K.mean(0).sum())
from numpy import asarray
import dask.array as da
from pandas import DataFrame
import xarray as xr

if isinstance(K, DataFrame):
K = K.astype(float)
trace = K.values.trace()
elif isinstance(K, da.Array):
trace = da.diag(K).sum()
elif isinstance(K, xr.DataArray):
trace = da.diag(K.data).sum()
pass
else:
K = asarray(K, float)
trace = K.trace()

c = (K.shape[0] - 1) / (trace - K.mean(axis=0).sum())
if out is None:
return c * K
return K * c

_copyto(out, K)
_inplace_mult(out, c)

return out


def _copyto(dst, src):
from numpy import copyto, asarray, ndarray
import dask.array as da
from pandas import DataFrame

if isinstance(dst, DataFrame):
copyto(dst.values, src)
elif isinstance(dst, ndarray) and isinstance(src, da.Array):
copyto(dst, src.compute())
else:
copyto(dst, src)


def _inplace_mult(out, c):
import dask.array as da

copyto(out, K)
if isinstance(c, da.Array):
c = c.compute()
out *= c
113 changes: 113 additions & 0 deletions limix/qc/test/test_kinship.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import dask.array as da
import xarray as xr
from numpy import ndarray, zeros
from numpy.random import RandomState
from numpy.testing import assert_, assert_allclose
from pandas import DataFrame

from limix.qc import normalise_covariance


def test_qc_kinship_numpy():
random = RandomState(0)
X = random.randn(3, 5)
K = X.dot(X.T)
K1 = zeros((3, 3))

K0 = normalise_covariance(K)
K2 = normalise_covariance(K, out=K1)

Kf = [
[2.5990890007787586, -0.1951278087849671, 0.5472860002747189],
[-0.1951278087849671, 0.4202620710126438, 0.2642930556468809],
[0.5472860002747189, 0.2642930556468809, 0.5971001753452302],
]

assert_allclose(K0, Kf)
assert_allclose(K0, K1)
assert_allclose(K0, K2)
assert_(K2 is K1)


def test_qc_kinship_dataframe():
random = RandomState(0)
X = random.randn(3, 5)
K = X.dot(X.T)
K = DataFrame(K)

K1 = DataFrame(zeros((3, 3)))

K0 = normalise_covariance(K)
K2 = normalise_covariance(K, out=K1)

Kf = [
[2.5990890007787586, -0.1951278087849671, 0.5472860002747189],
[-0.1951278087849671, 0.4202620710126438, 0.2642930556468809],
[0.5472860002747189, 0.2642930556468809, 0.5971001753452302],
]

assert_allclose(K0, Kf)
assert_(isinstance(K0, DataFrame))

assert_allclose(K0, K1)
assert_(isinstance(K1, DataFrame))

assert_allclose(K0, K2)
assert_(K2 is K1)


def test_qc_kinship_dask_array():
random = RandomState(0)
X = random.randn(3, 5)
K = X.dot(X.T)
K = da.from_array(K, chunks=2)

K1 = zeros((3, 3))

K0 = normalise_covariance(K)
K2 = normalise_covariance(K, out=K1)

Kf = [
[2.5990890007787586, -0.1951278087849671, 0.5472860002747189],
[-0.1951278087849671, 0.4202620710126438, 0.2642930556468809],
[0.5472860002747189, 0.2642930556468809, 0.5971001753452302],
]

assert_allclose(K0, Kf)
assert_(isinstance(K0, da.Array))

assert_allclose(K0, K1)
assert_(isinstance(K1, ndarray))
assert_(isinstance(K2, ndarray))

assert_allclose(K0, K2)
assert_(K2 is K1)


def test_qc_kinship_dataarray():
random = RandomState(0)
X = random.randn(3, 5)
K = X.dot(X.T)
K = da.from_array(K, chunks=2)
K = xr.DataArray(K)

K1 = zeros((3, 3))

K0 = normalise_covariance(K)
K2 = normalise_covariance(K, out=K1)

Kf = [
[2.5990890007787586, -0.1951278087849671, 0.5472860002747189],
[-0.1951278087849671, 0.4202620710126438, 0.2642930556468809],
[0.5472860002747189, 0.2642930556468809, 0.5971001753452302],
]

assert_allclose(K0, Kf)
assert_(isinstance(K0, xr.DataArray))

assert_allclose(K0, K1)
assert_(isinstance(K1, ndarray))
assert_(isinstance(K2, ndarray))

assert_allclose(K0, K2)
assert_(K2 is K1)

0 comments on commit 8a58490

Please sign in to comment.