In [66]:
import os, re
import numpy
import numpy as np
from scipy import stats
from scipy import math
import pandas as pd

from scipy.stats.mstats import zscore
import matplotlib.pyplot as plt
%matplotlib inline

In [90]:
def compute_correlation(matrix1, matrix2):
    """compute correlation between two sets of variables

    Correlate the rows of matrix1 with the rows of matrix2.
    If matrix1 == matrix2, it is auto-correlation computation
    resulting in a symmetric correlation matrix.
    The number of columns MUST agree between set1 and set2.
    The correlation being computed here is
    the Pearson's correlation coefficient, which can be expressed as

    .. math:: corr(X, Y) = \\frac{cov(X, Y)}{\\sigma_X\\sigma_Y}

    where cov(X, Y) is the covariance of variable X and Y, and

    .. math:: \\sigma_X

    is the standard deviation of variable X

    Reducing the correlation computation to matrix multiplication
    and using BLAS GEMM API wrapped by Scipy can speedup the numpy built-in
    correlation computation (numpy.corrcoef) by one order of magnitude

    .. math::
        corr(X, Y)
        &= \\frac{\\sum\\limits_{i=1}^n (x_i-\\bar{x})(y_i-\\bar{y})}{(n-1)
        \\sqrt{\\frac{\\sum\\limits_{j=1}^n x_j^2-n\\bar{x}}{n-1}}
        \\sqrt{\\frac{\\sum\\limits_{j=1}^{n} y_j^2-n\\bar{y}}{n-1}}}\\\\
        &= \\sum\\limits_{i=1}^n(\\frac{(x_i-\\bar{x})}
        {\\sqrt{\\sum\\limits_{j=1}^n x_j^2-n\\bar{x}}}
        \\frac{(y_i-\\bar{y})}{\\sqrt{\\sum\\limits_{j=1}^n y_j^2-n\\bar{y}}})

    Parameters
    ----------
    matrix1: 2D array in shape [r1, c]
        MUST be continuous and row-major

    matrix2: 2D array in shape [r2, c]
        MUST be continuous and row-major

    Returns
    -------
    corr_data: 2D array in shape [r1, r2]
        continuous and row-major in np.float32
    """
    matrix1 = matrix1.astype(np.float32)
    matrix2 = matrix2.astype(np.float32)
    [r1, d1] = matrix1.shape
    [r2, d2] = matrix2.shape
    if d1 != d2:
        raise ValueError('Dimension discrepancy')
    # preprocess two components
    matrix1 = _normalize_for_correlation(matrix1, 1)
    matrix2 = _normalize_for_correlation(matrix2, 1)
    #print(matrix1.shape)
    #print(matrix2.shape)
    corr_data = np.corrcoef(matrix1, matrix2)
    #print(corr_data.shape)
    #print(d1)
    return corr_data[:r1, r1:]   ###I Believe this is the critical part! we need to fix this and then should be fine
    #According to my understanding, the submatrices AAA[0:90, 0:90]  and AAA[90:,90:] will be the respective "intra-brain" 
    #correlations. What we want is the one that goes across brains, i.e. the lower/upper quadrant. This should be relatively
    #easy to formalize. i.e. corr_data[:r1, r1:]  should be 0:90 rows, but 90:180 columns, which should be the across brain computation.

def _normalize_for_correlation(data, axis):
    """normalize the data before computing correlation
    The data will be z-scored and divided by sqrt(n)
    along the assigned axis
    Parameters
    ----------
    data: 2D array
    axis: int
        specify which dimension of the data should be normalized
    Returns
    -------
    data: 2D array
        the normalized data
    """
    shape = data.shape
    data = zscore(data, axis=axis, ddof=0)
    # if zscore fails (standard deviation is zero),
    # set all values to be zero
    data = np.nan_to_num(data)
    data = data / math.sqrt(shape[axis])
    return data

def isc(D, collapse_subj=True):
    """Intersubject correlation

    For each voxel, computes the correlation of each subject's timecourse with
    the mean of all other subjects' timecourses. By default the result is
    averaged across subjects, unless collapse_subj is set to False.

    Parameters
    ----------
    D : voxel by time by subject ndarray
        fMRI data for which to compute ISC

    collapse_subj : bool, default:True
        Whether to average across subjects before returning result

    Returns
    -------
    ISC : voxel ndarray (or voxel by subject ndarray, if collapse_subj=False)
        pearson correlation for each voxel, across subjects
    """

    n_vox = D.shape[0]
    n_subj = D.shape[2]
    ISC = np.zeros((n_vox, n_subj))

    # Loop across choice of leave-one-out subject
    for loo_subj in range(n_subj):
        group = np.mean(D[:, :, np.arange(n_subj) != loo_subj], axis=2)
        subj = D[:, :, loo_subj]
        for v in range(n_vox):
            ISC[v, loo_subj] = stats.pearsonr(group[v, :], subj[v, :])[0]

    if collapse_subj:
        ISC = np.mean(ISC, axis=1)
    return ISC


def isfc(D, collapse_subj=True):
    """Intersubject functional correlation
    Computes the correlation between the timecoure of each voxel in each
    subject with the average of all other subjects' timecourses in *all*
    voxels. By default the result is averaged across subjects, unless
    collapse_subj is set to False.
    Uses the high performance compute_correlation routine from fcma.util
    Parameters
    ----------
    D : voxel by time by subject ndarray
        fMRI data for which to compute ISFC
    collapse_subj : bool, default:True
        Whether to average across subjects before returning result
    Returns
    -------
    ISFC : voxel by voxel ndarray
        (or voxel by voxel by subject ndarray, if collapse_subj=False)
        pearson correlation between all pairs of voxels, across subjects
    """

    n_vox = D.shape[0]
    n_subj = D.shape[2]
    ISFC = np.zeros((n_vox, n_vox, n_subj))

    # Loop across choice of leave-one-out subject
    for loo_subj in range(D.shape[2]):
        group = np.mean(D[:, :, np.arange(n_subj) != loo_subj], axis=2)
        subj = D[:, :, loo_subj]
        ISFC[:, :, loo_subj] = compute_correlation(group, subj)

        # Symmetrize matrix
        ISFC[:, :, loo_subj] = (ISFC[:, :, loo_subj] +
                                ISFC[:, :, loo_subj].T) / 2

    if collapse_subj:
        ISFC = np.mean(ISFC, axis=2)
    return ISFC



In [91]:
networks_labels = [' anterior_Salience1', ' anterior_Salience2', ' anterior_Salience3', ' anterior_Salience4', ' anterior_Salience5', ' anterior_Salience6', ' anterior_Salience7', ' Auditory1', ' Auditory2', ' Auditory3', ' Basal_Ganglia1', ' Basal_Ganglia2', ' Basal_Ganglia3', ' Basal_Ganglia4', ' Basal_Ganglia5', ' dorsal_DMN1', ' dorsal_DMN2', ' dorsal_DMN3', ' dorsal_DMN4', ' dorsal_DMN5', ' dorsal_DMN6', ' dorsal_DMN7', ' dorsal_DMN8', ' dorsal_DMN9', ' high_Visual1', ' high_Visual2', ' Language1', ' Language2', ' Language3', ' Language4', ' Language5', ' Language6', ' Language7', ' LECN1', ' LECN2', ' LECN3', ' LECN4', ' LECN5', ' LECN6', ' post_Salience1', ' post_Salience2', ' post_Salience3', ' post_Salience4', ' post_Salience5', ' post_Salience6', ' post_Salience7', ' post_Salience8', ' post_Salience9', ' post_Salience10', ' post_Salience11', ' post_Salience12', ' Precuneus1', ' Precuneus2', ' Precuneus3', ' Precuneus4', ' prim_Visual1', ' prim_Visual2', ' RECN1', ' RECN2', ' RECN3', ' RECN4', ' RECN5', ' RECN6', ' Sensorimotor1', ' Sensorimotor2', ' Sensorimotor3', ' Sensorimotor4', ' Sensorimotor5', ' Sensorimotor6', ' ventral_DMN1', ' ventral_DMN2', ' ventral_DMN3', ' ventral_DMN4', ' ventral_DMN5', ' ventral_DMN6', ' ventral_DMN7', ' ventral_DMN8', ' ventral_DMN9', ' ventral_DMN10', ' Visuospatial1', ' Visuospatial2', ' Visuospatial3', ' Visuospatial4', ' Visuospatial5', ' Visuospatial6', ' Visuospatial7', ' Visuospatial8', ' Visuospatial9', ' Visuospatial10', ' Visuospatial11']

In [94]:
networks = { "anterior_salience": [0, 6], "auditory": [7, 9],
"basal ganglia": [10,14], "Dorsal_DMN": [15 , 23],  ' high_Visual': [24,25],
            "Language":[26,32], "LECN": [33, 38], "post_Salience": [39, 50],
            "Precuneus": [51, 54], "prim_Visual": [55,56], "RECN": [57, 62], 
           "Sensorimotor": [63, 68], "ventral_DMN": [69,78], "Visuospatial":[79,89]}

In [179]:
data_dir = '/Users/Rui/Dropbox/CNLab/peirui.github.io/brain_language/shirer90spheres8mm/'

os.chdir(data_dir)
org_subjs = ['PSA156', 'PSA019', 'PSA144', 'PSA061', 'PSA120', 'PSA032', 'PSA041', 'PSA079', 'PSA029', 
         'PSA036', 'PSA078', 'PSA086', 'PSA131', 'PSA132', 'PSA153', 'PSA112', 'PSA056', 'PSA068', 
         'PSA075', 'PSA108', 'PSA003', 'PSA005', 'PSA007', 'PSA008', 'PSA009', 'PSA010', 'PSA016', 
         'PSA017', 'PSA026', 'PSA027', 'PSA028', 'PSA030', 'PSA109', 'PSA069', 'PSA119', 'PSA128', 
         'PSA142', 'PSA094', 'PSA151', 'PSA127']
bad_subjs = ['PSA007', 'PSA017', 'PSA027']
subjs = [subj for subj in org_subjs if subj not in bad_subjs]

tr = 31

In [180]:
def network_isfc_vID(pID1, pID2, vID, network_name):
    data1_org = np.load(data_dir + pID1 + '_' + str(vID) + '_realcost01.npz')
    data1 = data1_org['arr_0'][:tr,networks[network_name][0]:(networks[network_name][1] + 1)]
    data2_org = np.load(data_dir + pID2 + '_' + str(vID) + '_realcost01.npz')
    data2 = data2_org['arr_0'][:tr,networks[network_name][0]:(networks[network_name][1] + 1)]

    n_vox = data1.shape[1]

    ISFC = []
    for v in range(n_vox):
        for w in range(n_vox):
            if w != v:
                ISFC = np.append(ISFC, stats.pearsonr(data1[v, :], data2[w, :])[0])

    isfc = np.average(ISFC)
    return isfc

In [181]:
def network_isfc(pID1, pID2, network_name):
    total_isfc = 0
    for i in range(12):
        total_isfc = total_isfc + network_isfc_vID(pID1, pID2, i+1, network_name)
    return total_isfc/12
        

In [None]:
# calculate ISC
# create a dataframe
df = pd.read_csv('/Users/Rui/Dropbox/CNLab/peirui.github.io/brain_language/individual_37choose2_empty.csv')
for i in range(len(networks_labels)):
    df[networks_labels[i]] = np.nan
    print(networks_labels[i])

for i in range(df.shape[0]):
    print(i)
    for j in range(len(networks_labels)):
        if (df['pID1'][i] in subjs) & (df['pID2'][i] in subjs):
            cor = 0
            for vID in range(12):
                data1_org = np.load(data_dir + df['pID1'][i] + '_' + str(vID + 1) + '_realcost01.npz')
                data1 = data1_org['arr_0'][:tr,:]
                data2_org = np.load(data_dir + df['pID2'][i] + '_' + str(vID + 1) + '_realcost01.npz')
                data2 = data2_org['arr_0'][:tr,:]
                cor += stats.pearsonr(data1[:,j],data2[:,j] )[0]
        
        df[networks_labels[j]][i] = cor/12

 anterior_Salience1
 anterior_Salience2
 anterior_Salience3
 anterior_Salience4
 anterior_Salience5
 anterior_Salience6
 anterior_Salience7
 Auditory1
 Auditory2
 Auditory3
 Basal_Ganglia1
 Basal_Ganglia2
 Basal_Ganglia3
 Basal_Ganglia4
 Basal_Ganglia5
 dorsal_DMN1
 dorsal_DMN2
 dorsal_DMN3
 dorsal_DMN4
 dorsal_DMN5
 dorsal_DMN6
 dorsal_DMN7
 dorsal_DMN8
 dorsal_DMN9
 high_Visual1
 high_Visual2
 Language1
 Language2
 Language3
 Language4
 Language5
 Language6
 Language7
 LECN1
 LECN2
 LECN3
 LECN4
 LECN5
 LECN6
 post_Salience1
 post_Salience2
 post_Salience3
 post_Salience4
 post_Salience5
 post_Salience6
 post_Salience7
 post_Salience8
 post_Salience9
 post_Salience10
 post_Salience11
 post_Salience12
 Precuneus1
 Precuneus2
 Precuneus3
 Precuneus4
 prim_Visual1
 prim_Visual2
 RECN1
 RECN2
 RECN3
 RECN4
 RECN5
 RECN6
 Sensorimotor1
 Sensorimotor2
 Sensorimotor3
 Sensorimotor4
 Sensorimotor5
 Sensorimotor6
 ventral_DMN1
 ventral_DMN2
 ventral_DMN3
 ventral_DMN4
 ventral_DMN5
 ventral_D

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277


In [None]:
df.to_csv('/Users/Rui/Dropbox/CNLab/peirui.github.io/brain_language/individual_37choose2_isc.csv')

In [96]:
#calculate ISFC
# create a dataframe

df = pd.read_csv('/Users/Rui/Dropbox/CNLab/peirui.github.io/brain_language/individual_37choose2_empty.csv')
for key in networks.keys():
    df[key] = np.nan

for i in range(df.shape[0]):
    print(i)
    for key in networks.keys():
        if (df['pID1'][i] in subjs) & (df['pID2'][i] in subjs):
            df[key][i] = network_isfc(df['pID1'][i], df['pID2'][i], key)
    

0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
  prob = _betai(0.5*df, 0.5, df/(df+t_squared))


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277


NameError: name 'write' is not defined

In [None]:
df.to_csv('individual_37choose2_isfc.csv')

In [None]:
df
