In [1]:
### Consensus analysis using mean Z score
import numpy
import scipy.stats


Following on our chat, I realise that my notes were missing some indices. Here’s a corrected version:



    Y_i: N-vector of T scores (or whatever) at voxel i
    Var(Y_i) =  Sigma_i

where Sigma_i is the NxN covariance matrix. But to be practical, we need to assume common variance, and a global correlation:

    Var(Y_i) =  sigma_i Q

Where sigma_i is the (scalar) variance at voxel i, Q the common NxN correlation (*not* covariamce)

Then the average is 
    bar{Y_i} = X’ Y_i/N
and
    Var(bar{Y_i}) =  sigma^2_i   X’ Q X / N^2

So then the T test is 
   T_i = bar(Y_i) / sqrt(Var(bar{Y_i}))

If you estimate the variance as you suggest

   hat{sigma^2_i} = Y_i’ R Y_i / tr(RQ)

then the effective DF is as you say,

    v = tr(RQ)^2 / tr(RQRQ)

But you could also use the naive estimate

   hat{sigma^2_i} = Y_i’ R Y_i / (N-1)

In [117]:
def t_corr2(y,Q=None):
    """
    perform a one-sample t-test on correlated data
    y = data (n observations X n vars)
    Q = "known" correlation across observations (use empirical correlation based on maps)
    """
    
    # equations in comments from Tom's email
    
    npts = y.shape[0]
    X = numpy.ones((npts,1))

    if len(y.shape)==1:
        y = y[:,numpy.newaxis]
    assert y.shape[1]==1
    
    if Q is None:
        #print('no Q specified, using identity (uncorrelated)')
        Q = numpy.eye(npts)

    y_hat = numpy.mean(y) # bar{Y_i} = X’ Y_i/N
    
    R = numpy.eye(npts) - X.dot(numpy.linalg.inv(X.T.dot(X))).dot(X.T)

    s_hat_2 = y.T.dot(R).dot(y)/(numpy.trace(R.dot(Q)))
    
    var_y_hat = s_hat_2 * X.T.dot(Q).dot(X)/(npts**2) #sigma^2_i   X’ Q X / N^2
    
    T = y_hat/numpy.sqrt(var_y_hat) # T_i = bar(Y_i) / sqrt(Var(bar{Y_i}))
    
    # R = I{n} - X(X'X)^{-1}X'

    # degrees of freedom = v = tr(RQ)^2/tr(RQRQ)
    df = (numpy.trace(R.dot(Q))**2)/numpy.trace(R.dot(Q).dot(R).dot(Q))
    p = 1 - scipy.stats.t.cdf(T,df=df)
    return(T,df,p)


def t_corr(y,Q=None):
    """
    perform a one-sample t-test on correlated data
    y = data (n observations X n vars)
    Q = "known" correlation across observations (use empirical correlation based on maps)
    """
    
    # Jeanette:
    # This paper calculates the df for an F-test, so the chisquare bit we need is in there.  Your t-statistic will come from
    # X = column of 1's (design matrix)

    if len(y.shape)==1:
        y = y[:,numpy.newaxis]
    assert y.shape[1]==1
    
    npts = y.shape[0]
    X = numpy.ones((npts,1))

    if Q is None:
        #print('no Q specified, using identity (uncorrelated)')
        Q = numpy.eye(npts)

    # R = I{n} - X(X'X)^{-1}X'
    R = numpy.eye(npts) - X.dot(numpy.linalg.inv(X.T.dot(X))).dot(X.T)

    # s-hat-2 = y'Ry/tr(RQ)
    s_hat_2 = y.T.dot(R).dot(y)/(numpy.trace(R.dot(Q)))

    # T  =  mean(y,0)/s-hat-2
    # use diag to get s_hat2 for each variable 
    T = numpy.mean(y,0)/numpy.diag(s_hat_2)

    # degrees of freedom = v = tr(RQ)^2/tr(RQRQ)
    df = (numpy.trace(R.dot(Q))**2)/numpy.trace(R.dot(Q).dot(R).dot(Q))
    p = 1 - scipy.stats.t.cdf(T,df=df)
    return(T,df,p)

In [135]:
npts = 36
nvars = 10
nruns=1000
alpha=.05
mu=0
# simulate independent case
pvals= numpy.zeros((nruns,nvars))

for i in range(nruns):
    y = numpy.random.randn(npts,nvars) + mu
    for j in range(nvars):
        result = t_corr2(y[:,j])
        pvals[i,j]=result[2]


In [136]:
pvals_mtx = numpy.array(pvals)
numpy.mean(pvals_mtx<alpha)  # bonferroni correction, should come out around 0.05

0.0508

In [154]:
# now simulate correlated data

def mk_correlated_data(npts,nvars,r,mu=0):
    base = numpy.random.randn(nvars)
    data = numpy.zeros((npts,nvars))
    for i in range(npts):
        data[i,:]=base*r + numpy.random.randn(nvars)*(1-r) + mu
    cc = numpy.corrcoef(data)
    return data,cc

pvals_corr = numpy.zeros((nruns,nvars))

# simulate correlated case
pvals_corr= numpy.zeros((nruns,nvars))
mu=1
for i in range(nruns):
    y,cc = mk_correlated_data(npts,nvars,0.0,mu)
    for j in range(nvars):
        result = t_corr2(y[:,j],cc)
        pvals_corr[i,j]=result[2]
    pvals_corr[i,:]=result[2]



In [153]:
pvals_mtx_corr = numpy.array(pvals_corr)
numpy.mean(pvals_mtx_corr<alpha)  # bonferroni correction, should come out around 0.05

1.0

In [2]:
   
# Tom:
# Yup... that’s the direction, but need to work out the variance of the mean too, not just worry about DF:

# So...

#     Y_i: N-vector of T scores (or whatever) at voxel i
#     Var(Y_i) =  Sigma_i

# where Sigma_i is the NxN covariance matrix. But to be practical, we need to assume common variance, and a global correlation:

#     Var(Y_i) =  sigma_i Q

# Where sigma_i is the (scalar) variance at voxel i, Q the common correlation

# Then the average is 
#     bar{Y_i} = X’Y_i/N
# and
#     Var(bar{Y}) =  sigma^2_i X_i’ Q X_i / N^2

# So then the T test is 
#    T = bar(Y) / sqrt(Var(bar{Y}))

# If you estimate the variance as you suggest

#    hat{sigma^2_i} = Y’RY / tr(RQ)

# then the effective DF is as you say,

#     v = tr(RQ)^2 / tr(RQRQ)

# But you could also use the naive estimate

#    hat{sigma^2_i} = Y’RY / (N-1)

# but then the DF are

#    v = (N-1)^2 / tr(RQRQ)
             