$$
r^2_{adj} = \hat{r}^2 - \frac { 1 - \hat{r}^2 } { N - 2 }
$$

In [1]:
import numpy as np
import numpy.random as nr
import pandas as pd

In [2]:
def tau(Ss):
    return(
        np.sum(Ss[0]*Ss[1]) /
        np.sqrt(
            np.sum(Ss[0]*Ss[0]) *
            np.sum(Ss[1]*Ss[1])
        )
    )

In [3]:
def read_LD(filename):
    Sigma = np.fromfile(filename, 'double', -1, '')
    M = int(np.sqrt(Sigma.shape[0]))
    return Sigma.reshape((M,M))

In [4]:
def banded_from(X, k):
    Y = X.copy()
    Y[np.triu_indices_from(Y, k)] = 0
    Y[np.tril_indices_from(Y, -k)] = 0
    return(Y)

In [5]:
pops  = ['eas', 'eur']
chrom = 22
files = [ '../phase2/ld/%s.%d.S.bin' % (pop, chrom) for pop in pops]
Sigma_pair = np.array([ read_LD(f) for f in files ])

In [6]:
Ns = [ 4862, 59844 ]

In [None]:
def adjust_r2 (S, n):

In [10]:
Sigma_pair[0]**2 - (1-Sigma_pair[0]**2)/(Ns[0]-2)

array([[  2.45295945e-01,   1.69717539e-04,   9.05590043e-04, ...,
         -6.03996902e-05,  -1.85908401e-04,  -2.05657895e-04],
       [  1.69717539e-04,   2.29427332e-01,   1.73254850e-02, ...,
         -2.05489327e-04,  -1.97779290e-04,  -1.94973753e-04],
       [  9.05590043e-04,   1.73254850e-02,   3.95537736e-02, ...,
         -2.05726821e-04,  -1.73692574e-04,  -2.04178021e-04],
       ..., 
       [ -6.03996902e-05,  -2.05489327e-04,  -2.05726821e-04, ...,
          1.60975047e-01,   1.43057562e-02,  -1.64138018e-04],
       [ -1.85908401e-04,  -1.97779290e-04,  -1.73692574e-04, ...,
          1.43057562e-02,   2.35568509e-01,   1.26414439e-05],
       [ -2.05657895e-04,  -1.94973753e-04,  -2.04178021e-04, ...,
         -1.64138018e-04,   1.26414439e-05,   3.08435038e-03]])

In [None]:
def run_test(Sigma_pair, chrom=22, M_T = 500, ks = [1000, 500, 200, 100, 50, 20, 10, 5], reps=100):
    Sigma_pairs = list()
    Sigma_pairs.append(Sigma_pair)

    ks = [1000, 500, 200, 100, 50, 20, 10, 5]

    for k in ks:
        Sigma_pairs.append(np.array([banded_from(S, k) for S in Sigma_pair]))

    M = Sigma_pair.shape[1]

    results = list()

    for r in range(reps):
        typed = nr.choice(M, M_T, replace=False)
        mask  = np.zeros(M, dtype='bool')
        mask[typed] = True
        results.append([tau(S[:,mask]) / tau(S[:,mask][:,:,mask]) for S in Sigma_pairs ])

    return( pd.DataFrame(results, columns=('all', *ks)) )

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
axes = run_test(M_T=200).boxplot(return_type='axes')
plt.show()

In [None]:
axes = run_test().boxplot(return_type='axes')
plt.show()

In [None]:
axes = run_test(M_T=1000).boxplot(return_type='axes')
plt.show()

In [None]:
axes = run_test(M_T=200, chrom=15, ks=[10, 100, 1000]).boxplot(return_type='axes')
plt.show()

In [None]:
axes = run_test(M_T=500, chrom=15).boxplot(return_type='axes')
plt.show()

In [None]:
axes = run_test(M_T=1000, chrom=15).boxplot(return_type='axes')
plt.show()