In [1]:
### Consensus analysis using mean Z score
import numpy
import scipy.stats
import matplotlib.pyplot as plt

# from Tom:

Following on our chat, I realise that my notes were missing some indices. Here’s a corrected version:

    Y_i: N-vector of T scores (or whatever) at voxel i
    Var(Y_i) =  Sigma_i

where Sigma_i is the NxN covariance matrix. But to be practical, we need to assume *common* variance, and a *global* correlation:

    Var(Y_i) =  sigma^2 Q

Where sigma is the (scalar) variance for whole image, Q the common NxN correlation (*not* covariance)

Then the average is 

    bar{Y_i} = X’ Y_i / N

where X is a column of ones and

    Var(bar{Y_i}) =  sigma^2   X’ Q X / N^2

So then the T test is 

    T_i = bar(Y_i) / sqrt(Var(bar{Y_i}))

I don't think the variance should be estimated over submissions/teams, but if you were to do so you could do it at each voxel as:

    Y_i’ R Y_i / tr(RQ)

then the effective DF is as you say,

    v = tr(RQ)^2 / tr(RQRQ)

But you could also use the naive estimate

    hat{sigma^2_i} = Y_i’ R Y_i / (N-1)
    
Finally, this will give a completely noramlised test statistic... i.e. a T_i image that is variance 1.  If we wish to retain the average variance of the various test statistics, we simply need to drop sigma^2 from the definition of Var(bar{Y_i}).

In [2]:
def t_corr(y,s_hat_2=None,Q=None):
    """
    perform a one-sample t-test on correlated data
    y = data (n observations X n vars)
    Q = "known" correlation across observations (use empirical correlation based on maps)
    """
    
    # Jeanette:
    # This paper calculates the df for an F-test, so the chisquare bit we need is in there.  Your t-statistic will come from
    # X = column of 1's (design matrix)

    X = numpy.ones((npts,1))

    if Q is None:
        #print('no Q specified, using identity (uncorrelated)')
        Q = numpy.eye(npts)

    # R = I{n} - X(X'X)^{-1}X'
    R = numpy.eye(npts) - X.dot(numpy.linalg.inv(X.T.dot(X))).dot(X.T)

    if s_hat_2 is None:
        s_hat_2 = 1
        # Don't think this is needed/correct:
        # # s-hat-2 = y'Ry/tr(RQ)
        # s_hat_2 = y.T.dot(R).dot(y)/(numpy.trace(R.dot(Q)))
        
    VarMean = s_hat_2 * X.T.dot(Q).dot(X) / npts**2

    # T  =  mean(y,0)/s-hat-2
    # use diag to get s_hat2 for each variable 
    T = numpy.mean(y,0)/numpy.sqrt(VarMean)

    # degrees of freedom = v = tr(RQ)^2/tr(RQRQ)
    df = (numpy.trace(R.dot(Q))**2)/numpy.trace(R.dot(Q).dot(R).dot(Q))
    p = scipy.stats.t.cdf(T,df=df)
    return(T,df,p)

In [5]:
npts = 36
nvars = 10
nruns=1000

# simulate independent case
pvals = []
for i in range(nruns):
    y = numpy.random.randn(npts,nvars)
    result = t_corr(y)
    pvals.append(result[2].tolist())
pvals_mtx = numpy.array(pvals)   
numpy.mean(pvals_mtx<=0.05)   # If p-values valid/nominal, 5% should be below 0.05


0.0463

In [109]:
npts = 36
nvars = 10
nruns=100
rho=0.9

# now simulate correlated data

def mk_CS_Cov(npts,rho):
    Cov = (1-rho)*numpy.identity(npts)+rho*numpy.ones([npts,npts])
    return(Cov)
            
Q = mk_CS_Cov(npts,rho)

def mk_correlated_data(npts,nvars,Cov):
    
    pvals = []
    for i in range(nruns):
        y = numpy.random.multivariate_normal(numpy.zeros(npts),Cov,nvars).T
    return(y)


In [110]:
# Apply simulation 'right' way, telling t_corr about correlation
pvals = []
for i in range(nruns):
    y = mk_correlated_data(npts,nvars,Q)
    result = t_corr(y,None,Q)
    pvals.append(result[2].tolist())
pvals_mtx = numpy.array(pvals)   
numpy.mean(pvals_mtx<=0.05)   # If p-values valid/nominal, 5% should be below 0.05


0.051

In [121]:
# Apply simulation 'wrong' way, telling t_corr about correlation
pvals = []
for i in range(nruns):
    y = mk_correlated_data(npts,nvars,Q)
    result = t_corr(y)
    pvals.append(result[2].tolist())
pvals_mtx = numpy.array(pvals)   
numpy.mean(pvals_mtx<=0.05)   # If p-values valid/nominal... but you'll surely find it higher


0.374

In [125]:
print(rho)
print(y[:,0])
print(result[0])
print(result[1])

0.9
[1.16832173 1.2655247  0.91872055 0.96695345 0.75922581 0.38582622
 0.53303207 1.75454439 1.2296246  1.20750502 0.47400494 0.8325709
 1.02920045 1.17808991 0.86505562 1.24630867 0.64809131 1.40406542
 1.28193027 0.60977046 1.00211555 0.3033739  1.51665266 0.39704934
 0.61669889 1.18616785 1.01061705 1.1419307  1.30373429 0.84820372
 1.33873225 0.73156287 0.97850548 1.31734443 0.97902405 1.48909115]
[[ 5.98652845  1.84670152 -5.54931802 -6.9470364  -7.26680022 -0.2415651
  -0.8004416  -2.97792895 -0.67411505 -5.94823149]]
35.000000000000036


In [84]:
# Finally do a simulation where variance is wacky... output should also be wacky
BigSd=3
Tvals = []
pvals = []
for i in range(nruns):
    y = BigSd*mk_correlated_data(npts,nvars,Q)
    result = t_corr(y)
    pvals.append(result[2].tolist())
    Tvals.append(result[0].tolist())
pvals_mtx = numpy.array(pvals)   
Tvals_mtx = numpy.array(Tvals)   
numpy.std(Tvals_mtx)  


17.74765142509552

[-1.05869892 -1.22075625 -0.32704517 -0.28239941 -0.89451499 -1.09842964
 -1.25614818 -0.6639502  -0.8224491  -1.19889906 -0.60441808 -1.30108216
 -0.96012083 -1.02403721 -0.71249447 -1.21743339 -0.60207883 -0.99584428
 -0.9882371  -0.84661183 -0.64463676 -0.38856144 -0.32685999 -0.69046695
 -0.83947943 -0.76223982 -1.25047426 -0.80118529 -1.23095401 -0.84807762
 -0.94671084 -0.99286439 -0.78922693 -1.08084965 -0.95542829 -0.77292297]
35.000000000000036


In [107]:
# simulate independent case
pvals = []
Tvals = []
for i in range(nruns):
    y = 4*mk_correlated_data(npts,nvars,mk_CS_Cov(npts,0.3))
    result = t_corr(y)
    pvals.append(result[2].tolist())
    Tvals.append(result[0].tolist())
pvals_mtx = numpy.array(pvals)   
numpy.mean(pvals_mtx<=0.05)   # If p-values valid/nominal, 5% should be below 0.05
Tvals_mtx = numpy.array(Tvals)   
print(numpy.std(Tvals_mtx), numpy.std(Tvals_mtx[:,0,0]),numpy.std(Tvals_mtx[0,0,:])  )



14.141288117924207 13.917895803567212 12.179686958090187


In [108]:
mk_CS_Cov(npts,0.3)


array([[1. , 0.3, 0.3, ..., 0.3, 0.3, 0.3],
       [0.3, 1. , 0.3, ..., 0.3, 0.3, 0.3],
       [0.3, 0.3, 1. , ..., 0.3, 0.3, 0.3],
       ...,
       [0.3, 0.3, 0.3, ..., 1. , 0.3, 0.3],
       [0.3, 0.3, 0.3, ..., 0.3, 1. , 0.3],
       [0.3, 0.3, 0.3, ..., 0.3, 0.3, 1. ]])