In [7]:
### Consensus analysis using mean Z score
import numpy
import scipy.stats
import matplotlib.pyplot as plt

# from Tom:

Following on our chat, I realise that my notes were missing some indices. Here’s a corrected version:

    Y_i: N-vector of T scores (or whatever) at voxel i
    Var(Y_i) =  Sigma_i

where Sigma_i is the NxN covariance matrix. But to be practical, we need to assume *common* variance, and a *global* correlation:

    Var(Y_i) =  sigma^2 Q

Where sigma is the (scalar) variance for whole image, Q the common NxN correlation (*not* covariance)

Then the average is 

    bar{Y_i} = X’ Y_i / N

where X is a column of ones and

    Var(bar{Y_i}) =  sigma^2   X’ Q X / N^2

So then the T test is 

    T_i = bar(Y_i) / sqrt(Var(bar{Y_i}))

I don't think the variance should be estimated over submissions/teams, but if you were to do so you could do it at each voxel as:

    Y_i’ R Y_i / tr(RQ)

then the effective DF is as you say,

    v = tr(RQ)^2 / tr(RQRQ)

But you could also use the naive estimate

    hat{sigma^2_i} = Y_i’ R Y_i / (N-1)
    
Finally, this will give a completely noramlised test statistic... i.e. a T_i image that is variance 1.  If we wish to retain the average variance of the various test statistics, we simply need to drop sigma^2 from the definition of Var(bar{Y_i}).

In [8]:


def t_corr(y,s_hat_2=None,Q=None):
    """
    perform a one-sample t-test on correlated data
    y = data (n observations X n vars)
    Q = "known" correlation across observations (use empirical correlation based on maps)
    """
    
    # Jeanette:
    # This paper calculates the df for an F-test, so the chisquare bit we need is in there.  Your t-statistic will come from
    # X = column of 1's (design matrix)

    X = numpy.ones((npts,1))

    if Q is None:
        #print('no Q specified, using identity (uncorrelated)')
        Q = numpy.eye(npts)

    # R = I{n} - X(X'X)^{-1}X'
    R = numpy.eye(npts) - X.dot(numpy.linalg.inv(X.T.dot(X))).dot(X.T)

    if s_hat_2 is None:
        s_hat_2 = 1
        # Don't think this is needed/correct:
        # # s-hat-2 = y'Ry/tr(RQ)
        # s_hat_2 = y.T.dot(R).dot(y)/(numpy.trace(R.dot(Q)))
        
    VarMean = s_hat_2 * X.T.dot(Q).dot(X) / npts**2

    # T  =  mean(y,0)/s-hat-2
    # use diag to get s_hat2 for each variable 
    T = numpy.mean(y,0)/numpy.sqrt(VarMean)

    # degrees of freedom = v = tr(RQ)^2/tr(RQRQ)
    df = (numpy.trace(R.dot(Q))**2)/numpy.trace(R.dot(Q).dot(R).dot(Q))
    p = scipy.stats.t.cdf(T,df=df)
    return(T,df,p)

In [9]:
npts = 36
nvars = 10
nruns=1000

# simulate independent case
pvals = []
for i in range(nruns):
    y = numpy.random.randn(npts,nvars)
    result = t_corr(y)
    pvals.append(result[2].tolist())

In [33]:
### Work in progress!  pvals_mtx = numpy.array(pvals)
### Simulate dependent case
numpy.mean(pvals_mtx<=0.05)   # If p-values valid/nominal, 5% should be below 0.05
npts = 36
nvars = 10
nruns=1000
rho=0.9
Cov = (1-rho)*numpy.identity(npts)+rho*numpy.ones([npts,npts])
y = numpy.random.multivariate_normal(numpy.zeros(npts),Cov,nvars).T
numpy.info(y)
result = t_corr(y)
result

class:  ndarray
shape:  (36, 10)
strides:  (8, 288)
itemsize:  8
aligned:  True
contiguous:  False
fortran:  True
data pointer: 0x7fcbbf1b5800
byteorder:  little
byteswap:  False
type: float64


(array([[-5.50695625,  2.394767  ,  1.34272241,  4.28641553,  5.18283019,
          1.8052387 ,  3.61308617,  1.88095123, -0.82104834, -7.2929705 ]]),
 35.000000000000036,
 array([[1.72526607e-06, 9.88940803e-01, 9.05997126e-01, 9.99932357e-01,
         9.99995386e-01, 9.60177026e-01, 9.99529786e-01, 9.65840455e-01,
         2.08588141e-01, 8.03014600e-09]]))

In [34]:
npts = 36
nvars = 10
nruns=1000
rho=0.9
# now simulate correlated data

def mk_correlated_data(npts,nvars,r,rho):
    Cov = (1-rho)*numpy.identity(npts)+rho*numpy.ones(npts,npts)
    
    pvals = []
    for i in range(nruns):
        y = numpy.random.multivariate_normal(numpy.zeros(npts),Cov,nvars).T
        result = t_corr(y)
        pvals.append(result[2].tolist())

In [2]:
   
# Tom:
# Yup... that’s the direction, but need to work out the variance of the mean too, not just worry about DF:

# So...

#     Y_i: N-vector of T scores (or whatever) at voxel i
#     Var(Y_i) =  Sigma_i

# where Sigma_i is the NxN covariance matrix. But to be practical, we need to assume common variance, and a global correlation:

#     Var(Y_i) =  sigma_i Q

# Where sigma_i is the (scalar) variance at voxel i, Q the common correlation

# Then the average is 
#     bar{Y_i} = X’Y_i/N
# and
#     Var(bar{Y}) =  sigma^2_i X_i’ Q X_i / N^2

# So then the T test is 
#    T = bar(Y) / sqrt(Var(bar{Y}))

# If you estimate the variance as you suggest

#    hat{sigma^2_i} = Y’RY / tr(RQ)

# then the effective DF is as you say,

#     v = tr(RQ)^2 / tr(RQRQ)

# But you could also use the naive estimate

#    hat{sigma^2_i} = Y’RY / (N-1)

# but then the DF are

#    v = (N-1)^2 / tr(RQRQ)
             