In [1]:
%run 'library.ipynb'

In [2]:
rho_b=0.8
M=100
R=(0.5, 0.2)
K=(4, 2)
causal=(True, False)
N=None
binomial=False

In [3]:
# set up the number of untyped/typed SNPs    
if type(M) is int:
    Ms = (M, M)
else:
    Ms = M

Mtot = Ms[0] + Ms[1]

if N is None:
    N = 10*Mtot

In [4]:
# sample b_k
bs = list()
for (M, flag) in zip(Ms, causal):
    if flag:
        bs.append(multivariate_normal_sample(np.zeros(2), equicorrelation_matrix(rho_b), M).T)
    else:
        bs.append(np.zeros((2,M)))
bs = np.concatenate(bs, 1)

In [5]:
# create Sigma
Sigmas = np.array([
    block_banded_matrix(r, Ms[0], Ms[1], k) for r, k in zip(R, K)
])

# sample ps and generate standard deviation
ps = nr.uniform(0.05, 0.95, (2,2*M))
Vs = 2*ps*(1-ps)
sds = np.sqrt(Vs)

# transform Sigmas to Ss
Ss = np.array([
    Sigma * sd[:, None] * sd[None, :]
    for (Sigma, sd) in zip(Sigmas, sds)
])

In [None]:
# calculate true gs
gs = np.array([
    np.linalg.inv(S[Ms[0]:][:,Ms[0]:]).dot(S[Ms[0]:].dot(b))
    for (S, b) in zip(Ss, bs)
])

# calculate rho_g/rho_b ratio
ratio = tau(Ss[:,Ms[0]:]) / tau(Ss[:,Ms[0]:][:,:,Ms[0]:])

In [6]:
# sample genotypes
if (binomial):
    Xs = list()
    for P, Sigma in zip(ps, Sigmas):
        Z = multivariate_normal_sample(np.zeros(Mtot), Sigma, N)
        Z = ss.norm().sf(Z)
        G = np.array([ ss.binom(2, p).isf(z) for (p, z) in zip(P, Z.T) ])
        Xs.append((G - np.mean(G, axis=1)[:,None]).T)
    Xs = np.array(Xs)
else:
    Xs = np.array([
        multivariate_normal_sample(np.zeros(Mtot), S, N)
        for S in Ss
    ])

# generate phenotypes
Ys = (Xs * bs[:,None,:]).sum(axis=2)

In [22]:
Gs = np.array([X + 2*p[None,:] for X, p in zip(Xs, ps)])

In [26]:
Gs[0].mean(axis=0).shape

(200,)

In [47]:
np.hstack([Xs[0].T, Xs[1].T]).shape

(200, 4000)

In [46]:
def he_regression(X, Y):
    """Perform Haseman-Elston regression.
    
    Arguments:
    X -- Independent variables (M x N)
    Y -- Dependent variable (N x 1)
    
    Returns:
    h2g -- Heritability from typed SNPs
    """
    Z = X - X.mean(axis=1)[:,None]
    A = Z.T.dot(Z) / Z.shape[0]
    B = Y[:,None].dot(Y[None,:])
    return(np.triu(A*B,1).sum() / np.triu(A*A,1).sum())

In [42]:
he_regression(Gs[0].T[Ms[0]:], Ys[0])

27.980116712096738

In [45]:
Gs[0].T.mean(axis=1)

array([  1.55518812e+00,   6.70676323e-01,   1.35453705e+00,
         1.44662109e+00,   7.25080951e-01,   1.48483572e+00,
         9.47188612e-01,   5.27009622e-01,   7.65813064e-01,
         1.00563930e+00,   1.24617213e+00,   1.71679419e+00,
         1.66921908e+00,   1.62652105e-01,   1.03204888e+00,
         7.92544909e-01,   1.44110439e+00,   1.86523674e+00,
         1.70820582e+00,   3.11296611e-01,   1.38740948e+00,
         1.48785508e+00,   3.69199318e-01,   2.38667357e-01,
         1.10425520e+00,   1.36178643e+00,   4.89522887e-01,
         1.30998348e+00,   1.33109304e+00,   8.57598820e-01,
         1.14824528e+00,   5.06324214e-01,   2.60672350e-01,
         1.51371580e+00,   1.15607858e+00,   1.09258583e+00,
         9.13564049e-01,   1.33994073e+00,   1.75258446e+00,
         1.69029796e+00,   8.58391295e-01,   1.03604757e+00,
         9.52927195e-01,   1.30897895e-01,   5.08766843e-01,
         1.22069834e+00,   1.75679595e+00,   1.32836593e+00,
         4.26457229e-01,

In [40]:
he_regression(Xs[0,:,Ms[0]:].T + ps[0,Ms[0]:,None], Ys[0])

27.980116712096727

In [41]:
he_regression(Xs[0,:,Ms[0]:].T + ((ps[0] - ps[1])/2)[Ms[0]:,None], Ys[0])

27.980116712096724

In [36]:
Xs[0,:,Ms[0]:].T + (ps[0,Ms[0]:,None] - ps[1,Ms[0]:,None])/2

array([[ 0.04835764, -0.09580168,  0.70489909, ...,  0.13276354,
        -0.50972953, -0.70252572],
       [ 0.0800602 , -0.32189834, -0.0328658 , ..., -0.58930853,
         0.10583475, -0.46241226],
       [ 0.90451741,  0.33585316,  0.31562319, ..., -0.77806385,
        -0.15071713, -0.15961697],
       ..., 
       [ 0.26209567, -0.3021741 , -0.46873677, ...,  0.54916924,
         0.16117034,  1.04998977],
       [ 1.55134549, -1.02055744, -0.97546471, ..., -0.83330304,
         0.14242671, -0.40624623],
       [-0.19111681, -0.36773167, -0.49133476, ..., -0.70668771,
        -0.45467937, -0.21791507]])

In [14]:
Xs[0,:,Ms[0]:].T

array([[ 0.06826799, -0.07589133,  0.72480943, ...,  0.15267388,
        -0.48981918, -0.68261537],
       [ 0.08485845, -0.31710009, -0.02806755, ..., -0.58451028,
         0.110633  , -0.45761402],
       [ 0.84762065,  0.2789564 ,  0.25872643, ..., -0.83496061,
        -0.20761389, -0.21651373],
       ..., 
       [ 0.31023436, -0.2540354 , -0.42059807, ...,  0.59730794,
         0.20930903,  1.09812846],
       [ 1.4343482 , -1.13755472, -1.092462  , ..., -0.95030032,
         0.02542942, -0.52324352],
       [ 0.00788068, -0.16873418, -0.29233727, ..., -0.50769022,
        -0.25568189, -0.01891758]])

In [12]:
ps.shape

(2, 200)

In [None]:
# calculate ghat
ghats = np.array([
    np.linalg.inv(X.T[Ms[0]:].dot(X[:,Ms[0]:])).dot(X.T[Ms[0]:].dot(Y))
    for (X, Y) in zip(Xs, Ys)
])

# HE regression
rho_hat = he_regression_bivar(Xs[0,:,Ms[0]:].T, Xs[1,:,Ms[0]:].T, Ys[0], Ys[1])[2]

return({
    'cor(g1,g2)'        : np.corrcoef(gs)[0,1],
    'cor(g1hat, g2hat)' : np.corrcoef(ghats)[0,1],
    'rho_g_HE'          : rho_hat,
    'rho_g'             : rho_b * ratio
})

In [1]:
import numpy as np
import pandas as pd

### Read data

In [2]:
ID = pd.read_table('../phase2/HE/eas.eur.22.1k.001.rand.ncl.grm.id', names=['FID', 'IID'])

In [3]:
M = ID.FID.count()

In [4]:
phen = pd.read_table(
    '../phase2/HE/eas.eur.22.1k.0_5.0_5.001.rand.phen2',
    names=['FID', 'IID', 'phen1', 'phen2'],
    sep=' ', index_col=[0,1]
)

In [5]:
# reorder the phenotypes to match the GRM
phen = phen.reindex(ID)

In [6]:
phen.phen1 = (phen.phen1 - phen.phen1.mean()) / phen.phen1.std()
phen.phen2 = (phen.phen2 - phen.phen2.mean()) / phen.phen2.std()

In [7]:
g = np.fromfile('../phase2/HE/eas.eur.22.1k.001.rand.ncl.grm.bin', 'float32')

In [8]:
G = np.empty((M,M))

In [9]:
a = []
b = []
for i in range(M):
    a.extend([i]*(i+1))
    b.extend(range(i+1))
a = np.array(a)
b = np.array(b)

In [10]:
G[a,b] = g
G[b,a] = g

### Create the masks

In [11]:
mask1 = np.isnan(phen.phen2).values
mask2 = np.isnan(phen.phen1).values

### Create the phenotype matrix

In [12]:
Y11 = phen.phen1[mask1].reshape((-1,1)).dot(phen.phen1[mask1].reshape((1,-1)))
Y22 = phen.phen2[mask2].reshape((-1,1)).dot(phen.phen2[mask2].reshape((1,-1)))
Y12 = phen.phen1[mask1].reshape((-1,1)).dot(phen.phen2[mask2].reshape((1,-1)))

### HE regression

In [13]:
h1  = np.multiply( np.triu(G[mask1][:,mask1],1), np.triu(Y11,1) ).sum() / np.triu(G[mask1][:,mask1]**2).sum()
h2  = np.multiply( np.triu(G[mask2][:,mask2],1), np.triu(Y22,1) ).sum() / np.triu(G[mask2][:,mask2]**2).sum()
h12 = np.multiply( G[mask1][:,mask2], Y12 ).sum() / (G[mask1][:,mask2]**2).sum()

In [14]:
(h1, h2, h12)

(0.024985925112624683, 0.024647999225060883, 0.015137083271384284)

In [15]:
h12 / np.sqrt(h1*h2)

0.60996321836434741