# RSA analysis for 2023 fMRI data
### By: Linjing Jiang
### Updated on 5/25/2024

This script implements an RSA analysis for fMRI subjects collected from July 2023 to Dec 2024.
Here, I used the most updated RSA toolbox: https://rsatoolbox.readthedocs.io/en/stable/getting_started.html, 
https://github.com/rsagroup/rsatoolbox.

Before running this analysis, please run the following in order:
1. Spatiotemporal GLM first level in SPM (MATLAB) using '/mnt/Data1/linjdata1/vswmda/script/spm_glm/first_level_glm_spatiotemporal.m'. The output is under '/mnt/Data1/linjdata1/vswmda/scan_data/spm/output/preproc_st_spatiotemporal_censor'

2. Extract Beta map per experimental condition per run per ROI,using the '/mnt/Data1/linjdata1/vswmda/script/rsa_lj/prep_beta_map_for_mvpa.m' and '/mnt/Data1/linjdata1/vswmda/script/rsa_lj/get_beta_labels.m'. The output is under '/mnt/Data1/linjdata1/vswmda/scan_data/rsa/beta_run_rsa/f16/beta'

3. Prepare beta maps for rsa, using '/mnt/Data1/linjdata1/vswmda/script/rsa_lj/rsa_data_prep_beta_run_2024.m'. The output is a variable called 'responsePatterns' that is stored under '/mnt/Data1/linjdata1/vswmda/scan_data/rsa/beta_run_rsa/f16/'

After running these scripts, you will get a .mat files under the '/mnt/Data1/linjdata1/vswmda/scan_data/rsa/beta_run_rsa/f16/' folder:

1) responsePattern.mat: Contains extracted bold data across voxels for different ROIs and different sessions/participants.
- 'responsePatterns': data ready for RSA
- This is a 1 x 1 structure, containing 
    - responsePatterns.stim
    - responsePatterns.delay
    - responsePatterns.response
    - corresponding to different task epochs(stimulus, delay, and response)
    - output directory:  '/mnt/Data1/linjdata1/vswmda/scan_data/rsa/beta_run_rsa/f16/'

- for each task epoch, e.g., responsePatterns.stim. It is a 1 x N structure, where N is the number of ROIs x number of sessions x number of participants. For example, if there are 7 ROIs, 1 participants and 2 sessions per participant, then we get a 1 x 14 structure. 

- Within each structure, there are several fields:
    - 'name': ROI name | session | participant id
    - 'data': a V x R x C matrix, where V is the number of voxels in that ROI, R is the number of runs (not trials, because we are using run-wise beta maps derived from first-level GLM), C is the number of conditions. For example, if there are 400 voxels in that ROI, 3 runs of beta maps per condition, and 8 conditions, then we get a 400 x 3 x 8 matrix.
    
You will get another .mat file under the '/mnt/Data1/linjdata1/vswmda/scan_data/rsa/beta_run_rsa/' folder:

2) model.mat: Contains models



In [1]:
# relevant imports
import numpy as np
from scipy import io
import matplotlib.pyplot as plt
from matplotlib import rcParams
import rsatoolbox
import rsatoolbox.data as rsd # abbreviation to deal with dataset
import rsatoolbox.rdm as rsr
import os
import seaborn as sns
import sklearn as sk
import math
import pandas as pd
import pickle
import copy

# Define some functions

# Get beta maps from the data

In [2]:
# change the working directory to be the timecourse data
#os.chdir('/mnt/Data1/linjdata1/vswmda/scan_data/rsa/full_GLM_mgs_0.05_50/')
#os.chdir('/gpfs/scratch/linjjiang/scan_data/rsa/full_GLM_mgs_0.05_50/')
os.chdir('/gpfs/scratch/linjjiang/scan_data/rsa/full_GLM_atlas_roi/')

In [3]:
pwd

'/gpfs/scratch/linjjiang/scan_data/rsa/full_GLM_atlas_roi'

In [4]:
# get ROI name
# # if full_GLM_atlas_roi
order = ['area4-ju50',        
          'v1-wang25','v2-wang25',
          'ips0-wang15','ips1-wang15','ips2-wang15','ips3-wang15',
          'ips4-wang15','ips5-wang15','spl1-wang15','ips-wang15',
         'fef-wang25','spcs-md','ipcs-md','pmfg-md','amfg-md','ifg-md'
          ] 
         #'area8-hcp','area9-hcp','area9|46-hcp','area44|45|47l-hcp','fef-hcp',


# for atlas_roi:
order_full = ['area4-ju50',        
          'v1-wang25','v2-wang25',
          'ips0-wang15','ips1-wang15','ips2-wang15','ips3-wang15',
          'ips4-wang15','ips5-wang15','spl1-wang15','ips-wang15',
         'fef-wang25','spcs-md','ipcs-md','pmfg-md','amfg-md','ifg-md',
              'area8-hcp','area9-hcp','area9|46-hcp','area44|45|47l-hcp','fef-hcp'
          ] 
         #'area8-hcp','area9-hcp','area9|46-hcp','area44|45|47l-hcp','fef-hcp',
    
    
# if full_GLM_mgs_xxx
# order = ['area4', 'v1', 'v2', 'ips', 'fef', 'sfg', 'mfg', 'ifg',
#          'ips0', 'ips1', 'ips2', 'ips3', 'ips4', 'ips5', 'spl1']
# for mgs roi:
# order_full = order

In [5]:
print(order)

['area4-ju50', 'v1-wang25', 'v2-wang25', 'ips0-wang15', 'ips1-wang15', 'ips2-wang15', 'ips3-wang15', 'ips4-wang15', 'ips5-wang15', 'spl1-wang15', 'ips-wang15', 'fef-wang25', 'spcs-md', 'ipcs-md', 'pmfg-md', 'amfg-md', 'ifg-md']


In [6]:
# Define your specific order
#order = ['area4', 'v1', 'v2', 'ips', 'fef', 'sfg', 'mfg', 'ifg']

# Create a dictionary that maps each character to its position in the order

order_dict = {char: index for index, char in enumerate(order_full)}

# Group the strings into chunks
def group_chunks(lst):
    chunks = []
    indices = []
    current_chunk = []
    current_indices = []
    for i, elem in enumerate(lst):
        if not current_chunk or elem == current_chunk[-1]:
            current_chunk.append(elem)
            current_indices.append(i)
        else:
            chunks.append(current_chunk)
            indices.append(current_indices)
            current_chunk = [elem]
            current_indices = [i]
    chunks.append(current_chunk)
    indices.append(current_indices)
    return chunks, indices

### Load and save all the beta maps as datasets

In [7]:
subjects = ['f09','f10','f11','f12','f15','f16','f17','f18','f19']
epochs = ['delay','response','stimulus']

In [8]:
run = 'R1'
print([run]*3)

['R1', 'R1', 'R1']


In [9]:
for epoch in range(3):
    for subject in subjects:
        # load matlab data
        measurements = io.matlab.loadmat(subject+'/responsePattern.mat')

        # get data
        responsePatterns = measurements['responsePatterns']

        # get conditions
        conditions = measurements['good_cond']
        conditions = [
            x
            for xss in conditions
            for xs in xss
            for x in xs
        ]

        # get all data
        data_all = responsePatterns[0][0][epoch][0] # all participants/sessions/rois for the delay period
        #ndata = len(data_all['rawdata']) # how many datasets are there
        
        # all rois
        name = data_all[:]['name']
        roi_all = [name[0].split(sep=' | ')[0] for name in data_all[:]['name']]

        # reorder the datasets by rois
        chunks, indices = group_chunks(roi_all)

        # Sort the chunks based on the custom order
        sorted_chunk_indices = sorted(range(len(chunks)), key=lambda i: order_dict[chunks[i][0]])

        # Generate the sorted list of indices
        sorted_indices = [index for i in sorted_chunk_indices for index in indices[i]]

        # now create a dataset object
        dataset = []
        for dd in sorted_indices:#range(0,ndata):
            name = data_all[dd]['name']
            name_spt = name[0].split(sep=' | ') 
            roi_label = name_spt[0] # roi label
            subject = name_spt[1] # subject id
            session = name_spt[2] # session id
            run = name_spt[3] # run id
            ecc_or_set = name_spt[4] # eccentricity or spatial set id (E1: 3dva; E2: 5.5dva; P1: spatial set1; P2: spatial set 2

            if roi_label in ['area8-hcp','area9-hcp','area9|46-hcp','area44|45|47l-hcp','fef-hcp']:
                continue
                
            data = data_all[dd]['rawdata'] # number of voxels by conditions
            nVox = data.shape[0] # number of voxels
            #nRun = data.shape[1] # number of trials
            nCond = data.shape[1] # conditions
            #data = data.reshape(nVox,nRun*nCond) # first condition, then trials
            data = np.transpose(data) # number of conditions by voxels

#             # which rows (conditions) do not have all nan values (valid conditions)
#             cond_not_nan = ~np.isnan(data).all(axis=1)

#             # then, remove columns (voxels) with any nan values during the valid conditions
#             vox_not_nan = ~np.isnan(data[cond_not_nan,:]).any(axis=0) # voxels
#             data = data[:,vox_not_nan]

            # number of voxels included in the final analysis
            nVox_not_nan = data.shape[1] # number of voxels   

            obs_des = {'conds': conditions, 
                       'conds_index': np.arange(0,len(conditions)),
                       'run_name': [run]*len(conditions)} # descriptors for observations
            des = {'session': session, 
                   'subj': subject, 
                   'roi': roi_label, 
                   'run': run,
                   'ecc_or_set': ecc_or_set,
                   'name' : subject+' | '+session+' | '+roi_label} # descriptors #'run': run, 'name': name, 'ecc': ecc
            chn_des = {'voxels': np.array(['voxel_' + str(x) for x in np.arange(nVox_not_nan)])} # descriptors for channels
            dataset.append(rsd.Dataset(measurements=data,
                               descriptors=des,
                               obs_descriptors=obs_des,
                               channel_descriptors=chn_des))
        # save dataset
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_run_ecc.pkg'),'wb') as f:
            pickle.dump(dataset,f)

In [10]:
print(len(dataset))

204


In [11]:
12*len(order) # 12 runs by number of ROIs

204

In [12]:
print(dataset[0])

rsatoolbox.data.Dataset
measurements = 
[[ 1.13681543  1.38350403  3.93112111 ...  1.02807379  0.39928079
   1.18702257]
 [-2.84869862 -4.16433764 -0.19244017 ...  1.57189214  0.55069697
   0.33742237]
 [-3.5486486  -3.39419794  0.74993289 ...  2.44093609  1.17255092
  -1.37427986]
 [-1.26268065 -6.55327177 -0.56980014 ... -0.48392674 -0.04410791
  -1.77953053]
 [        nan         nan         nan ...         nan         nan
          nan]]
...

descriptors: 
session = S5934
subj = f19
roi = area4-ju50
run = R1
ecc_or_set = P1
name = f19 | S5934 | area4-ju50


obs_descriptors: 
conds = ['CWL1', 'CTL1', 'CWR1', 'CTR1', 'CWL2', 'CTL2', 'CWR2', 'CTR2', 'NWL1', 'NTL1', 'NWR1', 'NTR1', 'NWL2', 'NTL2', 'NWR2', 'NTR2']
conds_index = [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
run_name = ['R1', 'R1', 'R1', 'R1', 'R1', 'R1', 'R1', 'R1', 'R1', 'R1', 'R1', 'R1', 'R1', 'R1', 'R1', 'R1']


channel_descriptors: 
voxels = ['voxel_0' 'voxel_1' 'voxel_2' ... 'voxel_1354' 'voxel_1355' 'voxel_1356

In [13]:
x = list(range(16))
print(x)
print(np.mod(x,4))
print(np.mod(x,4).tolist())

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
[0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3]
[0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]


In [14]:
# Original list
original_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]

# Mapping list
mapped_list = [0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7]

# Creating a dictionary for the mapping
mapping_dict = dict(zip(original_list, mapped_list))

# Apply the mapping
result_list = [mapping_dict[item] for item in original_list]

print(result_list)

[0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7]


### Merge datasets across runs for experiment 1 and 2
#### this is for running the complex spatiotemporal model, as we combined two runs of different eccentricities, so each dataset
#### will have all 8 spatiotemporal conditions
SAME subject, session, ROI, pair of run would be the same dataset

In [15]:
subjects = ['f09','f10','f11','f12','f15','f16','f17','f18','f19']
epochs = ['delay','response','stimulus']
exp2_subjects = ['f17','f18','f19']

In [16]:
# map the conditions

# Original list
original_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]

# Mapping list
mapped_list = [0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7]

# Creating a dictionary for the mapping
mapping_dict = dict(zip(original_list, mapped_list))

print(conditions)

['CWL1', 'CTL1', 'CWR1', 'CTR1', 'CWL2', 'CTL2', 'CWR2', 'CTR2', 'NWL1', 'NTL1', 'NWR1', 'NTR1', 'NWL2', 'NTL2', 'NWR2', 'NTR2']


In [17]:
# here we only subset conditions, not voxels

In [18]:
from collections import defaultdict
import copy

for epoch in range(3):
    for subject in subjects:
        
        # load dataset
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_run_ecc.pkg'),'rb') as f:
            dataset = pickle.load(f)
        
        # merge across runs
        
        # figure out which dataset belongs to the same session, subject, and roi
        dicts = [dataset[i].descriptors['name'] for i in range(len(dataset))] # session, subject, roi for each dataset
        
        # Dictionary to store unique strings and their indices
        unique_strings = {}

        # Iterate through the list and populate the dictionary
        for index, string in enumerate(dicts):
            if string not in unique_strings:
                unique_strings[string] = [index]
            else:
                unique_strings[string].append(index)
                
        dataset_merge = []        
        for key in unique_strings: # for unique sessions and ROIs (subjects)
            
            # we want to further pair the runs across eccentricities
            data_temp = copy.deepcopy([dataset[k] for k in unique_strings[key]])
            if subject in exp2_subjects:
                dataset1 = [d for d in data_temp if (d.descriptors['ecc_or_set'] == 'P1')] # ecc 1
                dataset2 = [d for d in data_temp if (d.descriptors['ecc_or_set'] == 'P2')] # ecc 2
            else:
                dataset1 = [d for d in data_temp if (d.descriptors['ecc_or_set'] == 'E1')] # ecc 1
                dataset2 = [d for d in data_temp if (d.descriptors['ecc_or_set'] == 'E2')] # ecc 2
            
            ndata = min(len(dataset1),len(dataset2))
            
            # merge those datasets
            for r in range(ndata):
                run_val1 = dataset1[r].descriptors.pop('run', None)
                run_val2 = dataset2[r].descriptors.pop('run', None) #ndata-r-1
                ecc_val1 = dataset1[r].descriptors.pop('ecc_or_set', None)
                ecc_val2 = dataset2[r].descriptors.pop('ecc_or_set', None) #ndata-r-1
                
                dataset_test = rsatoolbox.data.dataset.merge_subsets([dataset1[r],dataset2[r]]) #ndata-r-1

                dataset_test.descriptors['run'] = run_val1+run_val2
                dataset_test.descriptors['ecc_or_set'] = ecc_val1+ecc_val2

                # remove nans
                measures = dataset_test.get_measurements()

                # which rows (conditions) do not have all nan values (valid conditions)
                cond_not_nan = ~np.isnan(measures).all(axis=1)

                # use it to subset data (measurements)
                measures = measures[cond_not_nan,:]

                # subset conditions
                conds = dataset_test.obs_descriptors['conds']
                conds_index = dataset_test.obs_descriptors['conds_index']
                run_name = dataset_test.obs_descriptors['run_name']
                
                subset_conds = [cond for cond, flag in zip(conds, cond_not_nan) if flag]
                subset_conds_index = [cond for cond, flag in zip(conds_index, cond_not_nan) if flag]
                subset_run_name = [cond for cond, flag in zip(run_name, cond_not_nan) if flag]
                
                # if it is experiment 2, we also want to simplify the conditions
                if subject in ['f17','f18','f19']:
                    subset_conds = [c[:3] for c in subset_conds]
                    #subset_conds_index = np.mod(subset_conds_index,8).tolist()
                    # Apply the mapping
                    subset_conds_index = [mapping_dict[item] for item in subset_conds_index]
                    
                # assign the subset conditions and measurements to the merged dataset
                dataset_test.obs_descriptors['conds'] = subset_conds
                dataset_test.obs_descriptors['conds_index'] = subset_conds_index
                dataset_test.obs_descriptors['run_name'] = subset_run_name
                dataset_test.measurements = measures

                # append the merged dataset
                dataset_merge.append(dataset_test)
 
         # save dataset
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_run.pkg'),'wb') as f:
            pickle.dump(dataset_merge,f)
            

In [19]:
print(len(dataset_merge))

102


In [20]:
6*len(order) # we combined 2 runs together, so the number of dataset is divided by 2

102

In [21]:
print(dataset_merge[0])

rsatoolbox.data.Dataset
measurements = 
[[ 1.13681543  1.38350403  3.93112111 ...  1.02807379  0.39928079
   1.18702257]
 [-2.84869862 -4.16433764 -0.19244017 ...  1.57189214  0.55069697
   0.33742237]
 [-3.5486486  -3.39419794  0.74993289 ...  2.44093609  1.17255092
  -1.37427986]
 [-1.26268065 -6.55327177 -0.56980014 ... -0.48392674 -0.04410791
  -1.77953053]
 [-1.64067352 -2.59643316 -1.58815801 ...  2.16497278  1.5415889
  -1.1298852 ]]
...

descriptors: 
session = S5934
roi = area4-ju50
subj = f19
name = f19 | S5934 | area4-ju50
run = R1R2
ecc_or_set = P1P2


obs_descriptors: 
conds_index = [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]
run_name = ['R1', 'R1', 'R1', 'R1', 'R1', 'R1', 'R1', 'R1', 'R2', 'R2', 'R2', 'R2', 'R2', 'R2', 'R2', 'R2']
conds = ['CWL', 'CTL', 'CWR', 'CTR', 'NWL', 'NTL', 'NWR', 'NTR', 'CWL', 'CTL', 'CWR', 'CTR', 'NWL', 'NTL', 'NWR', 'NTR']


channel_descriptors: 
voxels = ['voxel_0' 'voxel_1' 'voxel_2' ... 'voxel_1354' 'voxel_1355' 'voxel_1356']




### get rid of spatial set condition for experiment 2
#### this is for running the complex spatiotemporal model, as we get rid of spatial sets from the current conditions
SAME subject, session, ROI, pair of run, would be the same dataset

In [22]:
exp2_subjects = ['f17','f18','f19']
epochs = ['delay','response','stimulus']

In [23]:
for epoch in range(3):
    for subject in exp2_subjects:
        
        # load dataset
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_run_ecc.pkg'),'rb') as f:
            dataset = pickle.load(f)
            
        dataset_simple_cond = []
        for data in dataset:
            dataset1 = copy.deepcopy(data) 

            # remove nans
            measures = dataset1.get_measurements()

            # which rows (conditions) do not have all nan values (valid conditions)
            cond_not_nan = ~np.isnan(measures).all(axis=1)

            #print(cond_not_nan)

            # use it to subset data (measurements)
            measures = measures[cond_not_nan,:]

            # subset conditions
            conds = dataset1.obs_descriptors['conds']
            conds_index = dataset1.obs_descriptors['conds_index']
            subset_conds = [cond[:3] for cond, flag in zip(conds, cond_not_nan) if flag]
            subset_conds_index = [c for c in range(len(subset_conds))] #[cond for cond, flag in zip(conds_index, cond_not_nan) if flag]

            # assign the subset conditions and measurements to the merged dataset
            dataset1.obs_descriptors['conds'] = subset_conds
            dataset1.obs_descriptors['conds_index'] = subset_conds_index
            dataset1.measurements = measures

            # append the dataset
            dataset_simple_cond.append(dataset1)
 
         # save dataset
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_simple_cond_exp2.pkg'),'wb') as f:
            pickle.dump(dataset_simple_cond,f)
            

In [24]:
print(len(dataset_simple_cond))

204


In [25]:
print(dataset_simple_cond[1])

rsatoolbox.data.Dataset
measurements = 
[[-4.32295465 -6.16556215  0.7349382  ... -1.17331648  0.45550582
  -2.78499293]
 [-1.72085452 -3.63415432 -1.65540874 ...  0.33521384 -0.54659218
  -0.26850107]
 [-3.02165127 -6.29844046 -1.63729489 ...  0.08400786  1.30360365
  -1.44681382]
 [-3.38832998 -6.20992613  0.13150956 ...  0.90429479 -2.20506287
  -4.67829561]
 [-3.95717478 -6.65573072 -0.20510076 ... -3.87506557 -1.18707049
  -2.55028176]]
...

descriptors: 
session = S5934
subj = f19
roi = area4-ju50
run = R2
ecc_or_set = P2
name = f19 | S5934 | area4-ju50


obs_descriptors: 
conds = ['CWL', 'CTL', 'CWR', 'CTR', 'NWL', 'NTL', 'NWR', 'NTR']
conds_index = [0, 1, 2, 3, 4, 5, 6, 7]
run_name = ['R2', 'R2', 'R2', 'R2', 'R2', 'R2', 'R2', 'R2', 'R2', 'R2', 'R2', 'R2', 'R2', 'R2', 'R2', 'R2']


channel_descriptors: 
voxels = ['voxel_0' 'voxel_1' 'voxel_2' ... 'voxel_1354' 'voxel_1355' 'voxel_1356']




## Merge dataset for complex model (e.g., 8 spatiotemporal category)
Here we need to subset voxels. 
It's time to load residual and subset voxels for residual too.

In [26]:
# We want to further merge the dataset, so that we have different sessions and runs in one matrix

In [27]:
subjects = ['f09','f10','f11','f12','f15','f16','f17','f18','f19']
epochs = ['delay','response','stimulus']
exp2_subjects = ['f17','f18','f19']

In [28]:
# all model
for epoch in range(3):
    for subject in subjects:
        
        # load dataset (already merged across runs)
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_run.pkg'),'rb') as f:
            dataset_merge = pickle.load(f)
        
        dataset_merge_new = []
        
        # let's merge further across runs and sessions
        uniq_roi = order #['area4', 'v1', 'v2', 'ips', 'fef', 'sfg', 'mfg', 'ifg']
        #list(set([dataset_merge[i].descriptors['roi'] for i in range(len(dataset_merge))]))
        uniq_sess = sorted(list(set([dataset_merge[i].descriptors['session'] for i in range(len(dataset_merge))])))
        
        
        for curr_roi in uniq_roi: # for each roi
            data_across_sess = []
            idx = 0
            idx_run = 0  
            res_across_sess = []
            dof_task_across_sess = []
            dof_all_across_sess = []
            
            for idx_sess,curr_sess in enumerate(uniq_sess):
                #print(curr_sess)
                data_temp = copy.deepcopy([x for x in dataset_merge if (x.descriptors['session'] == curr_sess) and
                                                         (x.descriptors['roi'] == curr_roi)])
                
                # load corresponding residual map too
                with open(os.path.join(subject,'res',curr_sess,'residual_'+curr_roi+'.pkg'),'rb') as f:
                    residual_mat,num_res,dof_task,dof_all = pickle.load(f)  
                    
                res_across_sess.append(residual_mat)
                dof_task_across_sess.append(dof_task)
                dof_all_across_sess.append(dof_all)

                # for each run combination
                for rr in range(len(data_temp)):
                    curr_run = data_temp[rr].descriptors['run']
                    curr_conds = data_temp[rr].obs_descriptors['conds']
                    curr_conds_index = data_temp[rr].obs_descriptors['conds_index']
                    
                    # update conditions and condition index
                    # for the LR model
                    # calculate new condition names and indices
                    new_conds = curr_conds #curr_conds[x[-1]+'-'+curr_run+'-'+curr_sess for x in curr_conds]
                    new_conds_index = [x + idx for x in curr_conds_index]
                    pattern_index = curr_conds_index   
                    #new_run_name = curr_run_name

                    run_pair_index = [idx_run for _ in range(len(curr_conds))]
                    idx_run += 1
                    sess_index = [idx_sess for _ in range(len(curr_conds))]

            #                     ecc_val = data_temp[rr].descriptors.pop('ecc', None)
            #                     ecc_index = [ecc_val for _ in range(len(curr_conds))]

                    idx = idx+len(np.unique(new_conds)) #uniq_sess.index(curr_sess)*2
                    #print(new_conds)

                    # update
                    data_temp[rr].obs_descriptors['conds'] = new_conds
                    data_temp[rr].obs_descriptors['conds_index'] = new_conds_index

                    data_temp[rr].obs_descriptors['pattern_index'] = pattern_index
                    data_temp[rr].obs_descriptors['run_pair_index'] = run_pair_index
                    data_temp[rr].obs_descriptors['sess_index'] = sess_index
            #                    data_temp[rr].obs_descriptors['ecc_index'] = sess_index

                    # update the descriptors too
                    data_temp[rr].descriptors.pop('session', None)
                    data_temp[rr].descriptors.pop('run', None)
                    data_temp[rr].descriptors.pop('ecc_or_set', None)
                    data_temp[rr].descriptors['name'] = subject + ' | ' + curr_roi

                data_across_sess.extend(data_temp)
            #print(len(data_across_sess)) # 5 or 6 run combinations 

            #    now we can merge datasets across sessions
            dataset_test = rsatoolbox.data.dataset.merge_subsets(data_across_sess)
            #             print(len(dataset_test.obs_descriptors['conds']))

            
            # remove voxels that are nan
            measure = dataset_test.get_measurements() # beta map
            residual_measure = np.vstack(res_across_sess)# concatenate residual measures
            vox_not_nan = (~np.isnan(measure).any(axis=0)) & (~np.isnan(residual_measure).any(axis=0)) 
            # any column (voxel/channel) that is nan for both beta map and residual map
            
            # update beta map
            measure = measure[:,vox_not_nan]
            nVox_not_nan = measure.shape[1] # number of voxels  
            
            # update channel descriptor and measurements
            chn_des = {'voxels': np.array(['voxel_' + str(x) for x in np.arange(nVox_not_nan)])} # descriptors for channels
            dataset_test.measurements = measure
            dataset_test.channel_descriptors = chn_des
            
            # append to merged dataset
            dataset_merge_new.append(dataset_test)  
            
            # update residual measurement for each session
            for idx_sess,curr_sess in enumerate(uniq_sess):
                residual_mat_subset = res_across_sess[idx_sess][:,vox_not_nan]
                print(residual_mat_subset.shape,nVox_not_nan)
                
                # save residual dataset
                with open(os.path.join(subject,'res',curr_sess,'all_subset_residual_'+curr_roi+'.pkg'),'wb') as f:
                    pickle.dump([residual_mat_subset,dof_task_across_sess[idx_sess],dof_all_across_sess[idx_sess],],f)                

            
        # save dataset
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_sess_all.pkg'),'wb') as f:
            pickle.dump(dataset_merge_new,f)
            

(2490, 1339) 1339
(2490, 1743) 1743
(2490, 1154) 1154
(2490, 1597) 1597
(2490, 992) 992
(2490, 758) 758
(2490, 491) 491
(2490, 439) 439
(2490, 404) 404
(2490, 579) 579
(2490, 4119) 4119
(2490, 1033) 1033
(2490, 1939) 1939
(2490, 1891) 1891
(2490, 1352) 1352
(2490, 1240) 1240
(2490, 792) 792
(2490, 1248) 1248
(2075, 1248) 1248
(2490, 1728) 1728
(2075, 1728) 1728
(2490, 1417) 1417
(2075, 1417) 1417
(2490, 1630) 1630
(2075, 1630) 1630
(2490, 994) 994
(2075, 994) 994
(2490, 764) 764
(2075, 764) 764
(2490, 489) 489
(2075, 489) 489
(2490, 439) 439
(2075, 439) 439
(2490, 404) 404
(2075, 404) 404
(2490, 585) 585
(2075, 585) 585
(2490, 4165) 4165
(2075, 4165) 4165
(2490, 840) 840
(2075, 840) 840
(2490, 1669) 1669
(2075, 1669) 1669
(2490, 1637) 1637
(2075, 1637) 1637
(2490, 1095) 1095
(2075, 1095) 1095
(2490, 1147) 1147
(2075, 1147) 1147
(2490, 356) 356
(2075, 356) 356
(2075, 1351) 1351
(2905, 1351) 1351
(2075, 1766) 1766
(2905, 1766) 1766
(2075, 1565) 1565
(2905, 1565) 1565
(2075, 1636) 1636
(2

(2490, 1131) 1131
(2490, 1650) 1650
(2490, 1650) 1650
(2490, 992) 992
(2490, 992) 992
(2490, 764) 764
(2490, 764) 764
(2490, 491) 491
(2490, 491) 491
(2490, 439) 439
(2490, 439) 439
(2490, 404) 404
(2490, 404) 404
(2490, 585) 585
(2490, 585) 585
(2490, 4185) 4185
(2490, 4185) 4185
(2490, 798) 798
(2490, 798) 798
(2490, 1478) 1478
(2490, 1478) 1478
(2490, 1693) 1693
(2490, 1693) 1693
(2490, 902) 902
(2490, 902) 902
(2490, 853) 853
(2490, 853) 853
(2490, 443) 443
(2490, 443) 443
(2490, 1293) 1293
(2490, 1293) 1293
(2490, 1190) 1190
(2490, 1190) 1190
(2490, 844) 844
(2490, 844) 844
(2490, 1400) 1400
(2490, 1400) 1400
(2490, 917) 917
(2490, 917) 917
(2490, 742) 742
(2490, 742) 742
(2490, 487) 487
(2490, 487) 487
(2490, 439) 439
(2490, 439) 439
(2490, 404) 404
(2490, 404) 404
(2490, 573) 573
(2490, 573) 573
(2490, 3859) 3859
(2490, 3859) 3859
(2490, 923) 923
(2490, 923) 923
(2490, 1718) 1718
(2490, 1718) 1718
(2490, 1797) 1797
(2490, 1797) 1797
(2490, 1149) 1149
(2490, 1149) 1149
(2490, 120

In [29]:
print(measure.shape)
print(curr_roi)
print(residual_mat.shape)

(96, 743)
ifg-md
(2490, 1984)


In [30]:
print(len(dataset_merge_new))
print(dataset_merge_new[10])

17
rsatoolbox.data.Dataset
measurements = 
[[ 3.25829458  0.77345365  1.2984947  ...  2.53576016  9.57048893
  12.50533104]
 [ 2.58594298  0.81358588  1.02677143 ... -0.74873906  7.44950104
  12.72837448]
 [ 3.60658789  2.58145475  3.83818889 ...  2.80429077 10.73773861
  15.77918148]
 [ 1.57926857 -1.21271455  0.19060683 ...  2.58357906  3.4319756
   8.44166088]
 [ 2.92465663  2.1232121   1.40172887 ...  0.75550526  5.02144384
   9.05018234]]
...

descriptors: 
roi = ips-wang15
subj = f19
name = f19 | ips-wang15


obs_descriptors: 
pattern_index = [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4
 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1
 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
sess_index = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
run_name = ['R1' 'R1' 'R1' 'R1' 'R1' 'R1

# Merge dataset for simpler model (LR, WT, ecc)

In [31]:
# We want to further merge the dataset, so that we have different sessions and runs in one matrix
# for example, if we want to construct a WT (shape) matrix
# We need to rearrange the conditions as follows:
# W-Run14-S1, T-Run14-S1, W-R25-S1, T-R25-S1, W-R14-S2, ...

## LR model

In [32]:
subjects = ['f09','f10','f11','f12','f15','f16','f17','f18','f19']
epochs = ['delay','response','stimulus']
exp2_subjects = ['f17','f18','f19']

In [33]:
# LR model, averaged across eccentricities
for epoch in range(3):
    for subject in subjects:
        
        # load dataset (already merged across runs)
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_run.pkg'),'rb') as f:
            dataset_merge = pickle.load(f)
        
        dataset_merge_new = []
        
        # let's merge further across runs and sessions
        uniq_roi = order #['area4', 'v1', 'v2', 'ips', 'fef', 'sfg', 'mfg', 'ifg']
        #list(set([dataset_merge[i].descriptors['roi'] for i in range(len(dataset_merge))]))
        uniq_sess = sorted(list(set([dataset_merge[i].descriptors['session'] for i in range(len(dataset_merge))])))
        for curr_roi in uniq_roi:
            data_across_sess = []
            idx = 0
            idx_cond = 0
            idx_run = 0
            for idx_sess,curr_sess in enumerate(uniq_sess):
                #print(curr_sess)
                data_temp = copy.deepcopy([x for x in dataset_merge if (x.descriptors['session'] == curr_sess) and
                                                         (x.descriptors['roi'] == curr_roi)])

                # for each run combination
                for rr in range(len(data_temp)):
                    curr_run = data_temp[rr].descriptors['run']
                    curr_conds = data_temp[rr].obs_descriptors['conds']
                    curr_conds_index = data_temp[rr].obs_descriptors['conds_index']
                    #curr_run_name = data_temp[rr].obs_descriptors['run_name']
                    
                    # update conditions and condition index
                    # for the LR model
                    # calculate new condition names and indices
                    new_conds = [x[-1]+'-'+curr_run+'-'+curr_sess for x in curr_conds]
                    new_conds_index = [idx if x == 'L'+'-'+curr_run+'-'+curr_sess 
                                       else idx+1 for x in new_conds]
                    
                    pattern_index = [idx_cond if x == 'L'+'-'+curr_run+'-'+curr_sess 
                                     else idx_cond+1 for x in new_conds]       
                    run_index = [idx_run for _ in range(len(curr_conds))]
                    idx_run += 1
                    sess_index = [idx_sess for _ in range(len(curr_conds))]
                    
#                     ecc_val = data_temp[rr].descriptors.pop('ecc', None)
#                     ecc_index = [ecc_val for _ in range(len(curr_conds))]
            
                    idx = idx+2 #uniq_sess.index(curr_sess)*2
                    #print(new_conds)
                    
                    # update
                    data_temp[rr].obs_descriptors['full_conds'] = curr_conds
                    data_temp[rr].obs_descriptors['conds'] = new_conds
                    data_temp[rr].obs_descriptors['conds_index'] = new_conds_index
                    
                    data_temp[rr].obs_descriptors['pattern_index'] = pattern_index
                    data_temp[rr].obs_descriptors['run_pair_index'] = run_index
                    data_temp[rr].obs_descriptors['sess_index'] = sess_index
#                    data_temp[rr].obs_descriptors['ecc_index'] = sess_index
                    
                    # update the descriptors too
                    data_temp[rr].descriptors.pop('session', None)
                    data_temp[rr].descriptors.pop('run', None)
                    data_temp[rr].descriptors.pop('ecc', None)
                    data_temp[rr].descriptors['name'] = subject + ' | ' + curr_roi
                
                data_across_sess.extend(data_temp)
            #print(len(data_across_sess)) # 5 or 6 run combinations 

        #    now we can merge datasets across sessions
            dataset_test = rsatoolbox.data.dataset.merge_subsets(data_across_sess)
#             print(len(dataset_test.obs_descriptors['conds']))
#             print(dataset_test.obs_descriptors['conds'])

            # remove voxels that are nan
            measure = dataset_test.get_measurements()
            vox_not_nan = ~np.isnan(measure).any(axis=0) # voxels
            measure = measure[:,vox_not_nan]
            nVox_not_nan = measure.shape[1] # number of voxels  
            
            # update channel descriptor and measurements
            chn_des = {'voxels': np.array(['voxel_' + str(x) for x in np.arange(nVox_not_nan)])} # descriptors for channels
            dataset_test.measurements = measure
            dataset_test.channel_descriptors = chn_des
            
            dataset_merge_new.append(dataset_test)  
            
        # save dataset
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_sess_LR.pkg'),'wb') as f:
            pickle.dump(dataset_merge_new,f)
            

In [34]:
print(dataset_merge_new[0])

rsatoolbox.data.Dataset
measurements = 
[[ 1.13681543  1.38350403  3.93112111 ...  1.02807379  0.39928079
   1.18702257]
 [-2.84869862 -4.16433764 -0.19244017 ...  1.57189214  0.55069697
   0.33742237]
 [-3.5486486  -3.39419794  0.74993289 ...  2.44093609  1.17255092
  -1.37427986]
 [-1.26268065 -6.55327177 -0.56980014 ... -0.48392674 -0.04410791
  -1.77953053]
 [-1.64067352 -2.59643316 -1.58815801 ...  2.16497278  1.5415889
  -1.1298852 ]]
...

descriptors: 
ecc_or_set = P1P2
roi = area4-ju50
subj = f19
name = f19 | area4-ju50


obs_descriptors: 
pattern_index = [0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0
 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0
 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1]
full_conds = ['CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR'
 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 '

## LR model - 3-dva eccentricity or spatial set 1

In [35]:
subjects = ['f09','f10','f11','f12','f15','f16','f17','f18','f19']
epochs = ['delay','response','stimulus']
exp2_subjects = ['f17','f18','f19']

In [36]:
for epoch in range(3):
    for subject in subjects:
        
        # load dataset (already merged across runs)
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_run_ecc.pkg'),'rb') as f:
            dataset_merge = pickle.load(f)
        #print(dataset_merge[0])
        
        dataset_merge_new = []
        uniq_roi = order #['area4', 'v1', 'v2', 'ips', 'fef', 'sfg', 'mfg', 'ifg']
        #list(set([dataset_merge[i].descriptors['roi'] for i in range(len(dataset_merge))]))
        uniq_sess = sorted(list(set([dataset_merge[i].descriptors['session'] for i in range(len(dataset_merge))])))
        for curr_roi in uniq_roi: 
            # for each run and session
            idx = 0
            data_across_sess = []
            
            idx_cond = 0
            idx_run = 0
            for idx_sess,curr_sess in enumerate(uniq_sess):
                data_temp = copy.deepcopy([x for x in dataset_merge if (x.descriptors['session'] == curr_sess) and
                                                 (x.descriptors['roi'] == curr_roi)])
                #print(len(data_temp))
                for rr in range(len(data_temp)):
                    curr_ecc = data_temp[rr].descriptors['ecc_or_set']
                    
                    #print(curr_ecc)
                    if (curr_ecc == 'E2') | (curr_ecc == 'P2'): # 5.5 dva or spatial set 2
                        continue
                    
                    curr_run = data_temp[rr].descriptors['run']
                    curr_conds = data_temp[rr].obs_descriptors['conds']
                    curr_conds_index = data_temp[rr].obs_descriptors['conds_index']
                    curr_run_name = data_temp[rr].obs_descriptors['run_name']
                    
                    curr_data = data_temp[rr].get_measurements()
                    #print(curr_run,curr_ecc,curr_data.shape)

                    # remove nan conditions
                    # which rows (conditions) do not have all nan values (valid conditions)
                    cond_not_nan = ~np.isnan(curr_data).all(axis=1)
                    #print(cond_not_nan)

                    # let's remove those conditions
                    curr_data = curr_data[cond_not_nan,:]
                    #print(curr_data.shape)

                    # remove those invalid conditions from the conds and conds_index too
                    curr_conds = [curr_conds[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    curr_conds_index = [curr_conds_index[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    #print(curr_conds,curr_conds_index)
                    new_run_name = [curr_run_name[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    
                    # update conditions and condition index
                    # for the LR model
                    # calculate new condition names and indices
                    if subject in exp2_subjects:
                        new_conds = [x[-2]+'-'+curr_run+'-'+curr_sess for x in curr_conds]
                    else:
                        new_conds = [x[-1]+'-'+curr_run+'-'+curr_sess for x in curr_conds]
                        
                    new_conds_index = [idx if x == 'L'+'-'+curr_run+'-'+curr_sess 
                                       else idx+1 for x in new_conds]
                    idx = idx+2 #uniq_sess.index(curr_sess)*2
                    #print(new_conds)
                    
                    pattern_index = [idx_cond if x == 'L'+'-'+curr_run+'-'+curr_sess 
                                     else idx_cond+1 for x in new_conds]       
                    run_pair_index = [idx_run for _ in range(len(curr_conds))]
                    sess_index = [idx_sess for _ in range(len(curr_conds))]
                    idx_run += 1

                    # update
                    data_temp[rr].obs_descriptors['full_conds'] = [c[:3] for c in curr_conds]
                    data_temp[rr].measurements = curr_data
                    data_temp[rr].obs_descriptors['conds'] = new_conds
                    data_temp[rr].obs_descriptors['conds_index'] = new_conds_index
                    data_temp[rr].obs_descriptors['run_name'] = new_run_name
                    data_temp[rr].obs_descriptors['pattern_index'] = pattern_index
                    data_temp[rr].obs_descriptors['run_pair_index'] = run_pair_index
                    data_temp[rr].obs_descriptors['sess_index'] = sess_index
                    
                    # update the descriptors too
                    data_temp[rr].descriptors.pop('session', None)
                    data_temp[rr].descriptors.pop('run', None)
                    #data_temp[rr].descriptors.pop('ecc', None)
                    data_temp[rr].descriptors['name'] = subject + ' | ' + curr_roi + ' | ' + curr_ecc
                    
                    
                    #print(data_temp[rr])
                    data_across_sess.extend([data_temp[rr]])
                    #print(len(data_across_sess)) # 5 or 6 run combinations 

            #    now we can merge datasets across sessions
            dataset_test = rsatoolbox.data.dataset.merge_subsets(data_across_sess)
            #             print(len(dataset_test.obs_descriptors['conds']))
            #             print(dataset_test.obs_descriptors['conds'])

            # remove voxels that are nan
            measure = dataset_test.get_measurements()
            vox_not_nan = ~np.isnan(measure).any(axis=0) # voxels
            measure = measure[:,vox_not_nan]
            nVox_not_nan = measure.shape[1] # number of voxels  

            # update channel descriptor and measurements
            chn_des = {'voxels': np.array(['voxel_' + str(x) for x in np.arange(nVox_not_nan)])} # descriptors for channels
            dataset_test.measurements = measure
            dataset_test.channel_descriptors = chn_des

            dataset_merge_new.append(dataset_test)  

        # save dataset
        if subject in exp2_subjects:
            with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_sess_LR_SP1.pkg'),'wb') as f:
                pickle.dump(dataset_merge_new,f)
        else:
            with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_sess_LR_3dva.pkg'),'wb') as f:
                pickle.dump(dataset_merge_new,f)


In [37]:
print(dataset_merge_new[0])

rsatoolbox.data.Dataset
measurements = 
[[ 1.13681543  1.38350403  3.93112111 ...  1.02807379  0.39928079
   1.18702257]
 [-2.84869862 -4.16433764 -0.19244017 ...  1.57189214  0.55069697
   0.33742237]
 [-3.5486486  -3.39419794  0.74993289 ...  2.44093609  1.17255092
  -1.37427986]
 [-1.26268065 -6.55327177 -0.56980014 ... -0.48392674 -0.04410791
  -1.77953053]
 [-1.64067352 -2.59643316 -1.58815801 ...  2.16497278  1.5415889
  -1.1298852 ]]
...

descriptors: 
ecc_or_set = P1
roi = area4-ju50
subj = f19
name = f19 | area4-ju50 | P1


obs_descriptors: 
pattern_index = [0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0
 0 1 1 0 0 1 1 0 0 1 1]
full_conds = ['CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR'
 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR']
sess_index = [0 0 0 0 

## LR model - 5.5-dva eccentricity or spatial set 2

In [38]:
for epoch in range(3):
    for subject in subjects:
        
        # load dataset (already merged across runs)
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_run_ecc.pkg'),'rb') as f:
            dataset_merge = pickle.load(f)
        #print(dataset_merge[0])
        
        dataset_merge_new = []
        uniq_roi = order #['area4', 'v1', 'v2', 'ips', 'fef', 'sfg', 'mfg', 'ifg']
        #list(set([dataset_merge[i].descriptors['roi'] for i in range(len(dataset_merge))]))
        uniq_sess = sorted(list(set([dataset_merge[i].descriptors['session'] for i in range(len(dataset_merge))])))
        for curr_roi in uniq_roi: 
            # for each run and session
            idx = 0
            data_across_sess = []
            
            idx_cond = 0
            idx_run = 0
            for idx_sess,curr_sess in enumerate(uniq_sess):
                data_temp = copy.deepcopy([x for x in dataset_merge if (x.descriptors['session'] == curr_sess) and
                                                 (x.descriptors['roi'] == curr_roi)])
                #print(len(data_temp))
                for rr in range(len(data_temp)):
                    curr_ecc = data_temp[rr].descriptors['ecc_or_set']
                    
                    #print(curr_ecc)
                    if (curr_ecc == 'E1') | (curr_ecc == 'P1'): # 3 dva or spatial set 1
                        continue
                    
                    curr_run = data_temp[rr].descriptors['run']
                    curr_conds = data_temp[rr].obs_descriptors['conds']
                    curr_conds_index = data_temp[rr].obs_descriptors['conds_index']
                    curr_run_name = data_temp[rr].obs_descriptors['run_name']
                    
                    curr_data = data_temp[rr].get_measurements()
                    #print(curr_run,curr_ecc,curr_data.shape)

                    # remove nan conditions
                    # which rows (conditions) do not have all nan values (valid conditions)
                    cond_not_nan = ~np.isnan(curr_data).all(axis=1)
                    #print(cond_not_nan)

                    # let's remove those conditions
                    curr_data = curr_data[cond_not_nan,:]
                    #print(curr_data.shape)

                    # remove those invalid conditions from the conds and conds_index too
                    curr_conds = [curr_conds[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    curr_conds_index = [curr_conds_index[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    #print(curr_conds,curr_conds_index)
                    new_run_name = [curr_run_name[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    
                    # update conditions and condition index
                    # for the LR model
                    # calculate new condition names and indices
                    if subject in exp2_subjects:
                        new_conds = [x[-2]+'-'+curr_run+'-'+curr_sess for x in curr_conds]
                    else:
                        new_conds = [x[-1]+'-'+curr_run+'-'+curr_sess for x in curr_conds]
                        
                    new_conds_index = [idx if x == 'L'+'-'+curr_run+'-'+curr_sess 
                                       else idx+1 for x in new_conds]
                    idx = idx+2 #uniq_sess.index(curr_sess)*2
                    #print(new_conds)
                    
                    pattern_index = [idx_cond if x == 'L'+'-'+curr_run+'-'+curr_sess 
                                     else idx_cond+1 for x in new_conds]       
                    run_pair_index = [idx_run for _ in range(len(curr_conds))]
                    sess_index = [idx_sess for _ in range(len(curr_conds))]
                    idx_run += 1

                    # update
                    data_temp[rr].obs_descriptors['full_conds'] = [c[:3] for c in curr_conds]
                    data_temp[rr].measurements = curr_data
                    data_temp[rr].obs_descriptors['conds'] = new_conds
                    data_temp[rr].obs_descriptors['conds_index'] = new_conds_index
                    data_temp[rr].obs_descriptors['run_name'] = new_run_name
                    data_temp[rr].obs_descriptors['pattern_index'] = pattern_index
                    data_temp[rr].obs_descriptors['run_pair_index'] = run_pair_index
                    data_temp[rr].obs_descriptors['sess_index'] = sess_index
                    
                    # update the descriptors too
                    data_temp[rr].descriptors.pop('session', None)
                    data_temp[rr].descriptors.pop('run', None)
                    #data_temp[rr].descriptors.pop('ecc', None)
                    data_temp[rr].descriptors['name'] = subject + ' | ' + curr_roi + ' | ' + curr_ecc
                    
                    
                    #print(data_temp[rr])
                    data_across_sess.extend([data_temp[rr]])
                    #print(len(data_across_sess)) # 5 or 6 run combinations 

            #    now we can merge datasets across sessions
            dataset_test = rsatoolbox.data.dataset.merge_subsets(data_across_sess)
            #             print(len(dataset_test.obs_descriptors['conds']))
            #             print(dataset_test.obs_descriptors['conds'])

            # remove voxels that are nan
            measure = dataset_test.get_measurements()
            vox_not_nan = ~np.isnan(measure).any(axis=0) # voxels
            measure = measure[:,vox_not_nan]
            nVox_not_nan = measure.shape[1] # number of voxels  

            # update channel descriptor and measurements
            chn_des = {'voxels': np.array(['voxel_' + str(x) for x in np.arange(nVox_not_nan)])} # descriptors for channels
            dataset_test.measurements = measure
            dataset_test.channel_descriptors = chn_des

            dataset_merge_new.append(dataset_test)  

        # save dataset
        if subject in exp2_subjects:
            with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_sess_LR_SP2.pkg'),'wb') as f:
                pickle.dump(dataset_merge_new,f)
        else:
            with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_sess_LR_5.5dva.pkg'),'wb') as f:
                pickle.dump(dataset_merge_new,f)


In [39]:
print(dataset_merge_new[0])

rsatoolbox.data.Dataset
measurements = 
[[-4.32295465 -6.16556215  0.7349382  ... -1.17331648  0.45550582
  -2.78499293]
 [-1.72085452 -3.63415432 -1.65540874 ...  0.33521384 -0.54659218
  -0.26850107]
 [-3.02165127 -6.29844046 -1.63729489 ...  0.08400786  1.30360365
  -1.44681382]
 [-3.38832998 -6.20992613  0.13150956 ...  0.90429479 -2.20506287
  -4.67829561]
 [-3.95717478 -6.65573072 -0.20510076 ... -3.87506557 -1.18707049
  -2.55028176]]
...

descriptors: 
ecc_or_set = P2
roi = area4-ju50
subj = f19
name = f19 | area4-ju50 | P2


obs_descriptors: 
pattern_index = [0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0
 0 1 1 0 0 1 1 0 0 1 1]
full_conds = ['CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR'
 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR']
sess_index = [0 0 0 0

## WT model

In [40]:

for epoch in range(3):
    for subject in subjects:
        
        # load dataset (already merged across runs)
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_run.pkg'),'rb') as f:
            dataset_merge = pickle.load(f)
        
        dataset_merge_new = []
        
        # let's merge further across runs and sessions
        uniq_roi = order #['area4', 'v1', 'v2', 'ips', 'fef', 'sfg', 'mfg', 'ifg']
        #list(set([dataset_merge[i].descriptors['roi'] for i in range(len(dataset_merge))]))
        uniq_sess = sorted(list(set([dataset_merge[i].descriptors['session'] for i in range(len(dataset_merge))])))
        for curr_roi in uniq_roi:
            data_across_sess = []
            idx = 0
            idx_cond = 0
            idx_run = 0
            for idx_sess,curr_sess in enumerate(uniq_sess):
                #print(curr_sess)
                data_temp = copy.deepcopy([x for x in dataset_merge if (x.descriptors['session'] == curr_sess) and
                                                         (x.descriptors['roi'] == curr_roi)])

                # for each run combination
                for rr in range(len(data_temp)):
                    curr_run = data_temp[rr].descriptors['run']
                    curr_conds = data_temp[rr].obs_descriptors['conds']
                    curr_conds_index = data_temp[rr].obs_descriptors['conds_index']
                    #curr_run_name = data_temp[rr].obs_descriptors['run_name']
                    
                    # update conditions and condition index
                    # for the LR model
                    # calculate new condition names and indices
                    new_conds = [x[-2]+'-'+curr_run+'-'+curr_sess for x in curr_conds]
                    new_conds_index = [idx if x == 'W'+'-'+curr_run+'-'+curr_sess 
                                       else idx+1 for x in new_conds]
                    
                    pattern_index = [idx_cond if x == 'W'+'-'+curr_run+'-'+curr_sess 
                                     else idx_cond+1 for x in new_conds]       
                    run_index = [idx_run for _ in range(len(curr_conds))]
                    idx_run += 1
                    sess_index = [idx_sess for _ in range(len(curr_conds))]
                    
#                     ecc_val = data_temp[rr].descriptors.pop('ecc', None)
#                     ecc_index = [ecc_val for _ in range(len(curr_conds))]
            
                    idx = idx+2 #uniq_sess.index(curr_sess)*2
                    #print(new_conds)
                    
                    # update
                    data_temp[rr].obs_descriptors['full_conds'] = curr_conds
                    data_temp[rr].obs_descriptors['conds'] = new_conds
                    data_temp[rr].obs_descriptors['conds_index'] = new_conds_index
                    
                    data_temp[rr].obs_descriptors['pattern_index'] = pattern_index
                    data_temp[rr].obs_descriptors['run_pair_index'] = run_index
                    data_temp[rr].obs_descriptors['sess_index'] = sess_index
#                    data_temp[rr].obs_descriptors['ecc_index'] = sess_index
                    
                    # update the descriptors too
                    data_temp[rr].descriptors.pop('session', None)
                    data_temp[rr].descriptors.pop('run', None)
                    data_temp[rr].descriptors.pop('ecc', None)
                    data_temp[rr].descriptors['name'] = subject + ' | ' + curr_roi
                
                data_across_sess.extend(data_temp)
            #print(len(data_across_sess)) # 5 or 6 run combinations 

        #    now we can merge datasets across sessions
            dataset_test = rsatoolbox.data.dataset.merge_subsets(data_across_sess)
#             print(len(dataset_test.obs_descriptors['conds']))
#             print(dataset_test.obs_descriptors['conds'])

            # remove voxels that are nan
            measure = dataset_test.get_measurements()
            vox_not_nan = ~np.isnan(measure).any(axis=0) # voxels
            measure = measure[:,vox_not_nan]
            nVox_not_nan = measure.shape[1] # number of voxels  
            
            # update channel descriptor and measurements
            chn_des = {'voxels': np.array(['voxel_' + str(x) for x in np.arange(nVox_not_nan)])} # descriptors for channels
            dataset_test.measurements = measure
            dataset_test.channel_descriptors = chn_des
            
            dataset_merge_new.append(dataset_test)  
            
        # save dataset
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_sess_WT.pkg'),'wb') as f:
            pickle.dump(dataset_merge_new,f)
            

In [41]:
print(dataset_merge_new[0])

rsatoolbox.data.Dataset
measurements = 
[[ 1.13681543  1.38350403  3.93112111 ...  1.02807379  0.39928079
   1.18702257]
 [-2.84869862 -4.16433764 -0.19244017 ...  1.57189214  0.55069697
   0.33742237]
 [-3.5486486  -3.39419794  0.74993289 ...  2.44093609  1.17255092
  -1.37427986]
 [-1.26268065 -6.55327177 -0.56980014 ... -0.48392674 -0.04410791
  -1.77953053]
 [-1.64067352 -2.59643316 -1.58815801 ...  2.16497278  1.5415889
  -1.1298852 ]]
...

descriptors: 
ecc_or_set = P1P2
roi = area4-ju50
subj = f19
name = f19 | area4-ju50


obs_descriptors: 
pattern_index = [0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1]
full_conds = ['CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR'
 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 '

## WT model - 3 dva or spatial set 1

In [42]:
for epoch in range(3):
    for subject in subjects:
        
        # load dataset (already merged across runs)
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_run_ecc.pkg'),'rb') as f:
            dataset_merge = pickle.load(f)
        #print(dataset_merge[0])
        
        dataset_merge_new = []
        uniq_roi = order #['area4', 'v1', 'v2', 'ips', 'fef', 'sfg', 'mfg', 'ifg']
        #list(set([dataset_merge[i].descriptors['roi'] for i in range(len(dataset_merge))]))
        uniq_sess = sorted(list(set([dataset_merge[i].descriptors['session'] for i in range(len(dataset_merge))])))
        for curr_roi in uniq_roi: 
            # for each run and session
            idx = 0
            data_across_sess = []
            
            idx_cond = 0
            idx_run = 0
            for idx_sess,curr_sess in enumerate(uniq_sess):
                data_temp = copy.deepcopy([x for x in dataset_merge if (x.descriptors['session'] == curr_sess) and
                                                 (x.descriptors['roi'] == curr_roi)])
                #print(len(data_temp))
                for rr in range(len(data_temp)):
                    curr_ecc = data_temp[rr].descriptors['ecc_or_set']
                    
                    #print(curr_ecc)
                    if (curr_ecc == 'E2') | (curr_ecc == 'P2'): # 5.5 dva or spatial set 2
                        continue
                    
                    curr_run = data_temp[rr].descriptors['run']
                    curr_conds = data_temp[rr].obs_descriptors['conds']
                    curr_conds_index = data_temp[rr].obs_descriptors['conds_index']
                    curr_run_name = data_temp[rr].obs_descriptors['run_name']
                    
                    curr_data = data_temp[rr].get_measurements()
                    #print(curr_run,curr_ecc,curr_data.shape)

                    # remove nan conditions
                    # which rows (conditions) do not have all nan values (valid conditions)
                    cond_not_nan = ~np.isnan(curr_data).all(axis=1)
                    #print(cond_not_nan)

                    # let's remove those conditions
                    curr_data = curr_data[cond_not_nan,:]
                    #print(curr_data.shape)

                    # remove those invalid conditions from the conds and conds_index too
                    curr_conds = [curr_conds[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    curr_conds_index = [curr_conds_index[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    #print(curr_conds,curr_conds_index)
                    new_run_name = [curr_run_name[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    
                    # update conditions and condition index
                    # for the LR model
                    # calculate new condition names and indices
                    if subject in exp2_subjects:
                        new_conds = [x[-3]+'-'+curr_run+'-'+curr_sess for x in curr_conds]
                    else:
                        new_conds = [x[-2]+'-'+curr_run+'-'+curr_sess for x in curr_conds]
                        
                    new_conds_index = [idx if x == 'W'+'-'+curr_run+'-'+curr_sess 
                                       else idx+1 for x in new_conds]
                    idx = idx+2 #uniq_sess.index(curr_sess)*2
                    #print(new_conds)
                    
                    pattern_index = [idx_cond if x == 'W'+'-'+curr_run+'-'+curr_sess 
                                     else idx_cond+1 for x in new_conds]       
                    run_pair_index = [idx_run for _ in range(len(curr_conds))]
                    sess_index = [idx_sess for _ in range(len(curr_conds))]
                    idx_run += 1

                    # update
                    data_temp[rr].obs_descriptors['full_conds'] = [c[:3] for c in curr_conds]
                    data_temp[rr].measurements = curr_data
                    data_temp[rr].obs_descriptors['conds'] = new_conds
                    data_temp[rr].obs_descriptors['conds_index'] = new_conds_index
                    data_temp[rr].obs_descriptors['run_name'] = new_run_name
                    data_temp[rr].obs_descriptors['pattern_index'] = pattern_index
                    data_temp[rr].obs_descriptors['run_pair_index'] = run_pair_index
                    data_temp[rr].obs_descriptors['sess_index'] = sess_index
                    
                    # update the descriptors too
                    data_temp[rr].descriptors.pop('session', None)
                    data_temp[rr].descriptors.pop('run', None)
                    #data_temp[rr].descriptors.pop('ecc', None)
                    data_temp[rr].descriptors['name'] = subject + ' | ' + curr_roi + ' | ' + curr_ecc
                    
                    
                    #print(data_temp[rr])
                    data_across_sess.extend([data_temp[rr]])
                    #print(len(data_across_sess)) # 5 or 6 run combinations 

            #    now we can merge datasets across sessions
            dataset_test = rsatoolbox.data.dataset.merge_subsets(data_across_sess)
            #             print(len(dataset_test.obs_descriptors['conds']))
            #             print(dataset_test.obs_descriptors['conds'])

            # remove voxels that are nan
            measure = dataset_test.get_measurements()
            vox_not_nan = ~np.isnan(measure).any(axis=0) # voxels
            measure = measure[:,vox_not_nan]
            nVox_not_nan = measure.shape[1] # number of voxels  

            # update channel descriptor and measurements
            chn_des = {'voxels': np.array(['voxel_' + str(x) for x in np.arange(nVox_not_nan)])} # descriptors for channels
            dataset_test.measurements = measure
            dataset_test.channel_descriptors = chn_des

            dataset_merge_new.append(dataset_test)  

        # save dataset
        if subject in exp2_subjects:
            with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_sess_WT_SP1.pkg'),'wb') as f:
                pickle.dump(dataset_merge_new,f)
        else:
            with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_sess_WT_3dva.pkg'),'wb') as f:
                pickle.dump(dataset_merge_new,f)


In [43]:
print(dataset_merge_new[0])

rsatoolbox.data.Dataset
measurements = 
[[ 1.13681543  1.38350403  3.93112111 ...  1.02807379  0.39928079
   1.18702257]
 [-2.84869862 -4.16433764 -0.19244017 ...  1.57189214  0.55069697
   0.33742237]
 [-3.5486486  -3.39419794  0.74993289 ...  2.44093609  1.17255092
  -1.37427986]
 [-1.26268065 -6.55327177 -0.56980014 ... -0.48392674 -0.04410791
  -1.77953053]
 [-1.64067352 -2.59643316 -1.58815801 ...  2.16497278  1.5415889
  -1.1298852 ]]
...

descriptors: 
ecc_or_set = P1
roi = area4-ju50
subj = f19
name = f19 | area4-ju50 | P1


obs_descriptors: 
pattern_index = [0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
 1 0 1 0 1 0 1 0 1 0 1]
full_conds = ['CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR'
 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR']
sess_index = [0 0 0 0 

## WT model, 5.5 dva or spatial set 2

In [44]:
for epoch in range(3):
    for subject in subjects:
        
        # load dataset (already merged across runs)
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_run_ecc.pkg'),'rb') as f:
            dataset_merge = pickle.load(f)
        #print(dataset_merge[0])
        
        dataset_merge_new = []
        uniq_roi = order #['area4', 'v1', 'v2', 'ips', 'fef', 'sfg', 'mfg', 'ifg']
        #list(set([dataset_merge[i].descriptors['roi'] for i in range(len(dataset_merge))]))
        uniq_sess = sorted(list(set([dataset_merge[i].descriptors['session'] for i in range(len(dataset_merge))])))
        for curr_roi in uniq_roi: 
            # for each run and session
            idx = 0
            data_across_sess = []
            
            idx_cond = 0
            idx_run = 0
            for idx_sess,curr_sess in enumerate(uniq_sess):
                data_temp = copy.deepcopy([x for x in dataset_merge if (x.descriptors['session'] == curr_sess) and
                                                 (x.descriptors['roi'] == curr_roi)])
                #print(len(data_temp))
                for rr in range(len(data_temp)):
                    curr_ecc = data_temp[rr].descriptors['ecc_or_set']
                    
                    #print(curr_ecc)
                    if (curr_ecc == 'E1') | (curr_ecc == 'P1'): # 3 dva or spatial set 1
                        continue
                    
                    curr_run = data_temp[rr].descriptors['run']
                    curr_conds = data_temp[rr].obs_descriptors['conds']
                    curr_conds_index = data_temp[rr].obs_descriptors['conds_index']
                    curr_run_name = data_temp[rr].obs_descriptors['run_name']
                    
                    curr_data = data_temp[rr].get_measurements()
                    #print(curr_run,curr_ecc,curr_data.shape)

                    # remove nan conditions
                    # which rows (conditions) do not have all nan values (valid conditions)
                    cond_not_nan = ~np.isnan(curr_data).all(axis=1)
                    #print(cond_not_nan)

                    # let's remove those conditions
                    curr_data = curr_data[cond_not_nan,:]
                    #print(curr_data.shape)

                    # remove those invalid conditions from the conds and conds_index too
                    curr_conds = [curr_conds[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    curr_conds_index = [curr_conds_index[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    #print(curr_conds,curr_conds_index)
                    new_run_name = [curr_run_name[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    
                    # update conditions and condition index
                    # for the LR model
                    # calculate new condition names and indices
                    if subject in exp2_subjects:
                        new_conds = [x[-3]+'-'+curr_run+'-'+curr_sess for x in curr_conds]
                    else:
                        new_conds = [x[-2]+'-'+curr_run+'-'+curr_sess for x in curr_conds]
                        
                    new_conds_index = [idx if x == 'W'+'-'+curr_run+'-'+curr_sess 
                                       else idx+1 for x in new_conds]
                    idx = idx+2 #uniq_sess.index(curr_sess)*2
                    #print(new_conds)
                    
                    pattern_index = [idx_cond if x == 'W'+'-'+curr_run+'-'+curr_sess 
                                     else idx_cond+1 for x in new_conds]       
                    run_pair_index = [idx_run for _ in range(len(curr_conds))]
                    sess_index = [idx_sess for _ in range(len(curr_conds))]
                    idx_run += 1

                    # update
                    data_temp[rr].obs_descriptors['full_conds'] = [c[:3] for c in curr_conds]
                    data_temp[rr].measurements = curr_data
                    data_temp[rr].obs_descriptors['conds'] = new_conds
                    data_temp[rr].obs_descriptors['conds_index'] = new_conds_index
                    data_temp[rr].obs_descriptors['run_name'] = new_run_name
                    data_temp[rr].obs_descriptors['pattern_index'] = pattern_index
                    data_temp[rr].obs_descriptors['run_pair_index'] = run_pair_index
                    data_temp[rr].obs_descriptors['sess_index'] = sess_index
                    
                    # update the descriptors too
                    data_temp[rr].descriptors.pop('session', None)
                    data_temp[rr].descriptors.pop('run', None)
                    #data_temp[rr].descriptors.pop('ecc', None)
                    data_temp[rr].descriptors['name'] = subject + ' | ' + curr_roi + ' | ' + curr_ecc
                    
                    
                    #print(data_temp[rr])
                    data_across_sess.extend([data_temp[rr]])
                    #print(len(data_across_sess)) # 5 or 6 run combinations 

            #    now we can merge datasets across sessions
            dataset_test = rsatoolbox.data.dataset.merge_subsets(data_across_sess)
            #             print(len(dataset_test.obs_descriptors['conds']))
            #             print(dataset_test.obs_descriptors['conds'])

            # remove voxels that are nan
            measure = dataset_test.get_measurements()
            vox_not_nan = ~np.isnan(measure).any(axis=0) # voxels
            measure = measure[:,vox_not_nan]
            nVox_not_nan = measure.shape[1] # number of voxels  

            # update channel descriptor and measurements
            chn_des = {'voxels': np.array(['voxel_' + str(x) for x in np.arange(nVox_not_nan)])} # descriptors for channels
            dataset_test.measurements = measure
            dataset_test.channel_descriptors = chn_des

            dataset_merge_new.append(dataset_test)  

        # save dataset
        if subject in exp2_subjects:
            with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_sess_WT_SP2.pkg'),'wb') as f:
                pickle.dump(dataset_merge_new,f)
        else:
            with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_sess_WT_5.5dva.pkg'),'wb') as f:
                pickle.dump(dataset_merge_new,f)


In [45]:
print(dataset_merge_new[0])

rsatoolbox.data.Dataset
measurements = 
[[-4.32295465 -6.16556215  0.7349382  ... -1.17331648  0.45550582
  -2.78499293]
 [-1.72085452 -3.63415432 -1.65540874 ...  0.33521384 -0.54659218
  -0.26850107]
 [-3.02165127 -6.29844046 -1.63729489 ...  0.08400786  1.30360365
  -1.44681382]
 [-3.38832998 -6.20992613  0.13150956 ...  0.90429479 -2.20506287
  -4.67829561]
 [-3.95717478 -6.65573072 -0.20510076 ... -3.87506557 -1.18707049
  -2.55028176]]
...

descriptors: 
ecc_or_set = P2
roi = area4-ju50
subj = f19
name = f19 | area4-ju50 | P2


obs_descriptors: 
pattern_index = [0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
 1 0 1 0 1 0 1 0 1 0 1]
full_conds = ['CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR'
 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR']
sess_index = [0 0 0 0

## Eccentricity model (exp 1)

In [46]:
subjects = ['f09','f10','f11','f12','f15','f16','f17','f18','f19']
epochs = ['delay','response','stimulus']
exp1_subjects = ['f09','f10','f11','f12','f15','f16']
exp2_subjects = ['f17','f18','f19']

In [47]:
# FOR THE Eccentricity model
for epoch in range(3):
    for subject in exp1_subjects:
        
        # load dataset (already merged across runs)
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_run_ecc.pkg'),'rb') as f:
            dataset_merge = pickle.load(f)
        
        dataset_merge_new = []
        uniq_roi = order #['area4', 'v1', 'v2', 'ips', 'fef', 'sfg', 'mfg', 'ifg']
        #list(set([dataset_merge[i].descriptors['roi'] for i in range(len(dataset_merge))]))
        uniq_sess = sorted(list(set([dataset_merge[i].descriptors['session'] for i in range(len(dataset_merge))])))
        for curr_roi in uniq_roi: 
            # for each run and session
            count_for_5dva = np.arange(1,20,2)
            count_for_3dva = np.arange(0,20,2)
            idx5 = 0
            idx3 = 0
            data_across_sess = []
            
            idx_cond = 0
            idx_run = 0
            for idx_sess,curr_sess in enumerate(uniq_sess):
                data_temp = copy.deepcopy([x for x in dataset_merge if (x.descriptors['session'] == curr_sess) and
                                                 (x.descriptors['roi'] == curr_roi)])
                for rr in range(len(data_temp)):
                    curr_run = data_temp[rr].descriptors['run']
                    curr_conds = data_temp[rr].obs_descriptors['conds']
                    curr_conds_index = data_temp[rr].obs_descriptors['conds_index']
                    curr_ecc = data_temp[rr].descriptors['ecc_or_set']
                    curr_data = data_temp[rr].get_measurements()
                    curr_run_name = data_temp[rr].obs_descriptors['run_name']
                    #print(curr_run,curr_ecc,curr_data.shape)

                    # remove nan conditions
                    # which rows (conditions) do not have all nan values (valid conditions)
                    cond_not_nan = ~np.isnan(curr_data).all(axis=1)
                    #print(cond_not_nan)

                    # let's remove those conditions
                    curr_data = curr_data[cond_not_nan,:]
                    #print(curr_data.shape)

                    # remove those invalid conditions from the conds and conds_index too
                    curr_conds = [curr_conds[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    curr_conds_index = [curr_conds_index[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    new_run_name = [curr_run_name[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    #print(curr_conds,curr_conds_index)

                    # update conditions and condition index
                    # for the ecc model
                    # calculate new condition names and indices
                    new_conds = [x[0]+'-'+curr_run+'-'+curr_sess for x in curr_conds]
                    new_conds_index = [count_for_3dva[idx3] if x == '3'+'-'+curr_run+'-'+curr_sess 
                                       else count_for_5dva[idx5] for x in new_conds]
                    if '3' in curr_conds[0]:
                        idx3 += 1
                    elif '5' in curr_conds[0]:
                        idx5 += 1
                    #print(new_conds,new_conds_index)
                    
                    pattern_index = [idx_cond if x == '3'+'-'+curr_run+'-'+curr_sess 
                                     else idx_cond+1 for x in new_conds]       
                    run_pair_index = [idx_run for _ in range(len(curr_conds))]
                    sess_index = [idx_sess for _ in range(len(curr_conds))]
                    idx_run += 1

                    # update
                    data_temp[rr].measurements = curr_data
                    data_temp[rr].obs_descriptors['conds'] = new_conds
                    data_temp[rr].obs_descriptors['conds_index'] = new_conds_index
                    data_temp[rr].obs_descriptors['run_name'] = new_run_name
                    data_temp[rr].obs_descriptors['pattern_index'] = pattern_index
                    data_temp[rr].obs_descriptors['run_pair_index'] = run_pair_index
                    data_temp[rr].obs_descriptors['sess_index'] = sess_index

                    # update the descriptors too
                    data_temp[rr].descriptors.pop('session', None)
                    data_temp[rr].descriptors.pop('run', None)
                    data_temp[rr].descriptors.pop('ecc_or_set', None)
                    data_temp[rr].descriptors['name'] = subject + ' | ' + curr_roi

                data_across_sess.extend(data_temp)
                #print(len(data_across_sess)) # 5 or 6 run combinations 

            #    now we can merge datasets across sessions
            dataset_test = rsatoolbox.data.dataset.merge_subsets(data_across_sess)
            #             print(len(dataset_test.obs_descriptors['conds']))
            #             print(dataset_test.obs_descriptors['conds'])

            # remove voxels that are nan
            measure = dataset_test.get_measurements()
            vox_not_nan = ~np.isnan(measure).any(axis=0) # voxels
            measure = measure[:,vox_not_nan]
            nVox_not_nan = measure.shape[1] # number of voxels  

            # update channel descriptor and measurements
            chn_des = {'voxels': np.array(['voxel_' + str(x) for x in np.arange(nVox_not_nan)])} # descriptors for channels
            dataset_test.measurements = measure
            dataset_test.channel_descriptors = chn_des

            dataset_merge_new.append(dataset_test)  

        # save dataset
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_sess_ecc.pkg'),'wb') as f:
            pickle.dump(dataset_merge_new,f)


In [48]:
print(dataset_merge_new[0])

rsatoolbox.data.Dataset
measurements = 
[[ 0.78532481  1.10438108 -1.49982238 ...  0.17502366  1.18427742
   3.84072828]
 [ 1.46905017  2.04965448 -0.76967573 ...  0.44648355  0.64516133
   4.07084036]
 [-0.18436474 -1.36462533 -1.70203865 ... -1.61426926 -0.79944575
  -0.81298256]
 [ 3.70959854  3.71300507  0.35336173 ...  0.68085593  1.13180661
   4.42984915]
 [ 0.16760957  1.1413877   0.52713263 ...  1.54696178  1.63102031
   4.18733215]]
...

descriptors: 
roi = area4-ju50
subj = f16
name = f16 | area4-ju50


obs_descriptors: 
pattern_index = [0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 1
 1 1 1 0 0 0 0 0 0 0 0]
sess_index = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1]
run_name = ['R1' 'R1' 'R1' 'R1' 'R2' 'R2' 'R2' 'R2' 'R3' 'R3' 'R3' 'R3' 'R4' 'R4'
 'R4' 'R4' 'R5' 'R5' 'R5' 'R5' 'R6' 'R6' 'R6' 'R6' 'R1' 'R1' 'R1' 'R1'
 'R2' 'R2' 'R2' 'R2' 'R3' 'R3' 'R3' 'R3' 'R4' 'R4' 'R4' 'R4' 'R5' 'R5'
 'R5' 'R5' '

## Spatiotemporal regularity model (exp 2) - Crossing vs non-crossing

In [49]:

for epoch in range(3):
    for subject in exp2_subjects:
        
        # load dataset (already merged across runs)
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_run.pkg'),'rb') as f:
            dataset_merge = pickle.load(f)
        
        dataset_merge_new = []
        
        # let's merge further across runs and sessions
        uniq_roi = order #['area4', 'v1', 'v2', 'ips', 'fef', 'sfg', 'mfg', 'ifg']
        #list(set([dataset_merge[i].descriptors['roi'] for i in range(len(dataset_merge))]))
        uniq_sess = sorted(list(set([dataset_merge[i].descriptors['session'] for i in range(len(dataset_merge))])))
        for curr_roi in uniq_roi:
            data_across_sess = []
            idx = 0
            idx_cond = 0
            idx_run = 0
            for idx_sess,curr_sess in enumerate(uniq_sess):
                #print(curr_sess)
                data_temp = copy.deepcopy([x for x in dataset_merge if (x.descriptors['session'] == curr_sess) and
                                                         (x.descriptors['roi'] == curr_roi)])

                # for each run combination
                for rr in range(len(data_temp)):
                    curr_run = data_temp[rr].descriptors['run']
                    curr_conds = data_temp[rr].obs_descriptors['conds']
                    curr_conds_index = data_temp[rr].obs_descriptors['conds_index']
                    #curr_run_name = data_temp[rr].obs_descriptors['run_name']
                    
                    # update conditions and condition index
                    # for the LR model
                    # calculate new condition names and indices
                    new_conds = [x[-3]+'-'+curr_run+'-'+curr_sess for x in curr_conds]
                    new_conds_index = [idx if x == 'C'+'-'+curr_run+'-'+curr_sess 
                                       else idx+1 for x in new_conds]
                    
                    pattern_index = [idx_cond if x == 'C'+'-'+curr_run+'-'+curr_sess 
                                     else idx_cond+1 for x in new_conds]       
                    run_index = [idx_run for _ in range(len(curr_conds))]
                    idx_run += 1
                    sess_index = [idx_sess for _ in range(len(curr_conds))]
                    
#                     ecc_val = data_temp[rr].descriptors.pop('ecc', None)
#                     ecc_index = [ecc_val for _ in range(len(curr_conds))]
            
                    idx = idx+2 #uniq_sess.index(curr_sess)*2
                    #print(new_conds)
                    
                    # update
                    data_temp[rr].obs_descriptors['full_conds'] = curr_conds
                    data_temp[rr].obs_descriptors['conds'] = new_conds
                    data_temp[rr].obs_descriptors['conds_index'] = new_conds_index
                    
                    data_temp[rr].obs_descriptors['pattern_index'] = pattern_index
                    data_temp[rr].obs_descriptors['run_pair_index'] = run_index
                    data_temp[rr].obs_descriptors['sess_index'] = sess_index
#                    data_temp[rr].obs_descriptors['ecc_index'] = sess_index
                    
                    # update the descriptors too
                    data_temp[rr].descriptors.pop('session', None)
                    data_temp[rr].descriptors.pop('run', None)
                    data_temp[rr].descriptors.pop('ecc', None)
                    data_temp[rr].descriptors['name'] = subject + ' | ' + curr_roi
                
                data_across_sess.extend(data_temp)
            #print(len(data_across_sess)) # 5 or 6 run combinations 

        #    now we can merge datasets across sessions
            dataset_test = rsatoolbox.data.dataset.merge_subsets(data_across_sess)
#             print(len(dataset_test.obs_descriptors['conds']))
#             print(dataset_test.obs_descriptors['conds'])

            # remove voxels that are nan
            measure = dataset_test.get_measurements()
            vox_not_nan = ~np.isnan(measure).any(axis=0) # voxels
            measure = measure[:,vox_not_nan]
            nVox_not_nan = measure.shape[1] # number of voxels  
            
            # update channel descriptor and measurements
            chn_des = {'voxels': np.array(['voxel_' + str(x) for x in np.arange(nVox_not_nan)])} # descriptors for channels
            dataset_test.measurements = measure
            dataset_test.channel_descriptors = chn_des
            
            dataset_merge_new.append(dataset_test)  
            
        # save dataset
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_sess_CN.pkg'),'wb') as f:
            pickle.dump(dataset_merge_new,f)
            

In [50]:
print(dataset_merge_new[0])

rsatoolbox.data.Dataset
measurements = 
[[ 1.13681543  1.38350403  3.93112111 ...  1.02807379  0.39928079
   1.18702257]
 [-2.84869862 -4.16433764 -0.19244017 ...  1.57189214  0.55069697
   0.33742237]
 [-3.5486486  -3.39419794  0.74993289 ...  2.44093609  1.17255092
  -1.37427986]
 [-1.26268065 -6.55327177 -0.56980014 ... -0.48392674 -0.04410791
  -1.77953053]
 [-1.64067352 -2.59643316 -1.58815801 ...  2.16497278  1.5415889
  -1.1298852 ]]
...

descriptors: 
ecc_or_set = P1P2
roi = area4-ju50
subj = f19
name = f19 | area4-ju50


obs_descriptors: 
pattern_index = [0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1
 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0
 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1]
full_conds = ['CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR'
 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 '

## Crossing/non-crossing (exp2) - spatial set 1

In [51]:
for epoch in range(3):
    for subject in exp2_subjects:
        
        # load dataset (already merged across runs)
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_run_ecc.pkg'),'rb') as f:
            dataset_merge = pickle.load(f)
        #print(dataset_merge[0])
        
        dataset_merge_new = []
        uniq_roi = order #['area4', 'v1', 'v2', 'ips', 'fef', 'sfg', 'mfg', 'ifg']
        #list(set([dataset_merge[i].descriptors['roi'] for i in range(len(dataset_merge))]))
        uniq_sess = sorted(list(set([dataset_merge[i].descriptors['session'] for i in range(len(dataset_merge))])))
        for curr_roi in uniq_roi: 
            # for each run and session
            idx = 0
            data_across_sess = []
            
            idx_cond = 0
            idx_run = 0
            for idx_sess,curr_sess in enumerate(uniq_sess):
                data_temp = copy.deepcopy([x for x in dataset_merge if (x.descriptors['session'] == curr_sess) and
                                                 (x.descriptors['roi'] == curr_roi)])
                #print(len(data_temp))
                for rr in range(len(data_temp)):
                    curr_ecc = data_temp[rr].descriptors['ecc_or_set']
                    
                    #print(curr_ecc)
                    if (curr_ecc == 'E2') | (curr_ecc == 'P2'): # 5.5 dva or spatial set 2
                        continue
                    
                    curr_run = data_temp[rr].descriptors['run']
                    curr_conds = data_temp[rr].obs_descriptors['conds']
                    curr_conds_index = data_temp[rr].obs_descriptors['conds_index']
                    curr_run_name = data_temp[rr].obs_descriptors['run_name']
                    
                    curr_data = data_temp[rr].get_measurements()
                    #print(curr_run,curr_ecc,curr_data.shape)

                    # remove nan conditions
                    # which rows (conditions) do not have all nan values (valid conditions)
                    cond_not_nan = ~np.isnan(curr_data).all(axis=1)
                    #print(cond_not_nan)

                    # let's remove those conditions
                    curr_data = curr_data[cond_not_nan,:]
                    #print(curr_data.shape)

                    # remove those invalid conditions from the conds and conds_index too
                    curr_conds = [curr_conds[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    curr_conds_index = [curr_conds_index[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    #print(curr_conds,curr_conds_index)
                    new_run_name = [curr_run_name[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    
                    # update conditions and condition index
                    # for the LR model
                    # calculate new condition names and indices
                    new_conds = [x[0]+'-'+curr_run+'-'+curr_sess for x in curr_conds]
                        
                    new_conds_index = [idx if x == 'C'+'-'+curr_run+'-'+curr_sess 
                                       else idx+1 for x in new_conds]
                    idx = idx+2 #uniq_sess.index(curr_sess)*2
                    #print(new_conds)
                    
                    pattern_index = [idx_cond if x == 'C'+'-'+curr_run+'-'+curr_sess 
                                     else idx_cond+1 for x in new_conds]       
                    run_pair_index = [idx_run for _ in range(len(curr_conds))]
                    sess_index = [idx_sess for _ in range(len(curr_conds))]
                    idx_run += 1

                    # update
                    data_temp[rr].obs_descriptors['full_conds'] = [c[:3] for c in curr_conds]
                    data_temp[rr].measurements = curr_data
                    data_temp[rr].obs_descriptors['conds'] = new_conds
                    data_temp[rr].obs_descriptors['conds_index'] = new_conds_index
                    data_temp[rr].obs_descriptors['run_name'] = new_run_name
                    data_temp[rr].obs_descriptors['pattern_index'] = pattern_index
                    data_temp[rr].obs_descriptors['run_pair_index'] = run_pair_index
                    data_temp[rr].obs_descriptors['sess_index'] = sess_index
                    
                    # update the descriptors too
                    data_temp[rr].descriptors.pop('session', None)
                    data_temp[rr].descriptors.pop('run', None)
                    #data_temp[rr].descriptors.pop('ecc', None)
                    data_temp[rr].descriptors['name'] = subject + ' | ' + curr_roi + ' | ' + curr_ecc
                    
                    
                    #print(data_temp[rr])
                    data_across_sess.extend([data_temp[rr]])
                    #print(len(data_across_sess)) # 5 or 6 run combinations 

            #    now we can merge datasets across sessions
            dataset_test = rsatoolbox.data.dataset.merge_subsets(data_across_sess)
            #             print(len(dataset_test.obs_descriptors['conds']))
            #             print(dataset_test.obs_descriptors['conds'])

            # remove voxels that are nan
            measure = dataset_test.get_measurements()
            vox_not_nan = ~np.isnan(measure).any(axis=0) # voxels
            measure = measure[:,vox_not_nan]
            nVox_not_nan = measure.shape[1] # number of voxels  

            # update channel descriptor and measurements
            chn_des = {'voxels': np.array(['voxel_' + str(x) for x in np.arange(nVox_not_nan)])} # descriptors for channels
            dataset_test.measurements = measure
            dataset_test.channel_descriptors = chn_des

            dataset_merge_new.append(dataset_test)  

        # save dataset
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_sess_CN_SP1.pkg'),'wb') as f:
            pickle.dump(dataset_merge_new,f)



In [52]:
print(dataset_merge_new[0])

rsatoolbox.data.Dataset
measurements = 
[[ 1.13681543  1.38350403  3.93112111 ...  1.02807379  0.39928079
   1.18702257]
 [-2.84869862 -4.16433764 -0.19244017 ...  1.57189214  0.55069697
   0.33742237]
 [-3.5486486  -3.39419794  0.74993289 ...  2.44093609  1.17255092
  -1.37427986]
 [-1.26268065 -6.55327177 -0.56980014 ... -0.48392674 -0.04410791
  -1.77953053]
 [-1.64067352 -2.59643316 -1.58815801 ...  2.16497278  1.5415889
  -1.1298852 ]]
...

descriptors: 
ecc_or_set = P1
roi = area4-ju50
subj = f19
name = f19 | area4-ju50 | P1


obs_descriptors: 
pattern_index = [0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1
 1 1 1 0 0 0 0 1 1 1 1]
full_conds = ['CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR'
 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR']
sess_index = [0 0 0 0 

## Crossing/non-crossing (exp2) - spatial set 2

In [53]:
for epoch in range(3):
    for subject in exp2_subjects:
        
        # load dataset (already merged across runs)
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_run_ecc.pkg'),'rb') as f:
            dataset_merge = pickle.load(f)
        #print(dataset_merge[0])
        
        dataset_merge_new = []
        uniq_roi = order #['area4', 'v1', 'v2', 'ips', 'fef', 'sfg', 'mfg', 'ifg']
        #list(set([dataset_merge[i].descriptors['roi'] for i in range(len(dataset_merge))]))
        uniq_sess = sorted(list(set([dataset_merge[i].descriptors['session'] for i in range(len(dataset_merge))])))
        for curr_roi in uniq_roi: 
            # for each run and session
            idx = 0
            data_across_sess = []
            
            idx_cond = 0
            idx_run = 0
            for idx_sess,curr_sess in enumerate(uniq_sess):
                data_temp = copy.deepcopy([x for x in dataset_merge if (x.descriptors['session'] == curr_sess) and
                                                 (x.descriptors['roi'] == curr_roi)])
                #print(len(data_temp))
                for rr in range(len(data_temp)):
                    curr_ecc = data_temp[rr].descriptors['ecc_or_set']
                    
                    #print(curr_ecc)
                    if (curr_ecc == 'E1') | (curr_ecc == 'P1'): # 3 dva or spatial set 1
                        continue
                    
                    curr_run = data_temp[rr].descriptors['run']
                    curr_conds = data_temp[rr].obs_descriptors['conds']
                    curr_conds_index = data_temp[rr].obs_descriptors['conds_index']
                    curr_run_name = data_temp[rr].obs_descriptors['run_name']
                    
                    curr_data = data_temp[rr].get_measurements()
                    #print(curr_run,curr_ecc,curr_data.shape)

                    # remove nan conditions
                    # which rows (conditions) do not have all nan values (valid conditions)
                    cond_not_nan = ~np.isnan(curr_data).all(axis=1)
                    #print(cond_not_nan)

                    # let's remove those conditions
                    curr_data = curr_data[cond_not_nan,:]
                    #print(curr_data.shape)

                    # remove those invalid conditions from the conds and conds_index too
                    curr_conds = [curr_conds[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    curr_conds_index = [curr_conds_index[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    #print(curr_conds,curr_conds_index)
                    new_run_name = [curr_run_name[i] for i in range(len(cond_not_nan)) if cond_not_nan[i]]
                    
                    # update conditions and condition index
                    # for the LR model
                    # calculate new condition names and indices
                    new_conds = [x[0]+'-'+curr_run+'-'+curr_sess for x in curr_conds]
                        
                    new_conds_index = [idx if x == 'C'+'-'+curr_run+'-'+curr_sess 
                                       else idx+1 for x in new_conds]
                    idx = idx+2 #uniq_sess.index(curr_sess)*2
                    #print(new_conds)
                    
                    pattern_index = [idx_cond if x == 'C'+'-'+curr_run+'-'+curr_sess 
                                     else idx_cond+1 for x in new_conds]       
                    run_pair_index = [idx_run for _ in range(len(curr_conds))]
                    sess_index = [idx_sess for _ in range(len(curr_conds))]
                    idx_run += 1

                    # update
                    data_temp[rr].obs_descriptors['full_conds'] = [c[:3] for c in curr_conds]
                    data_temp[rr].measurements = curr_data
                    data_temp[rr].obs_descriptors['conds'] = new_conds
                    data_temp[rr].obs_descriptors['conds_index'] = new_conds_index
                    data_temp[rr].obs_descriptors['run_name'] = new_run_name
                    data_temp[rr].obs_descriptors['pattern_index'] = pattern_index
                    data_temp[rr].obs_descriptors['run_pair_index'] = run_pair_index
                    data_temp[rr].obs_descriptors['sess_index'] = sess_index
                    
                    # update the descriptors too
                    data_temp[rr].descriptors.pop('session', None)
                    data_temp[rr].descriptors.pop('run', None)
                    #data_temp[rr].descriptors.pop('ecc', None)
                    data_temp[rr].descriptors['name'] = subject + ' | ' + curr_roi + ' | ' + curr_ecc
                    
                    
                    #print(data_temp[rr])
                    data_across_sess.extend([data_temp[rr]])
                    #print(len(data_across_sess)) # 5 or 6 run combinations 

            #    now we can merge datasets across sessions
            dataset_test = rsatoolbox.data.dataset.merge_subsets(data_across_sess)
            #             print(len(dataset_test.obs_descriptors['conds']))
            #             print(dataset_test.obs_descriptors['conds'])

            # remove voxels that are nan
            measure = dataset_test.get_measurements()
            vox_not_nan = ~np.isnan(measure).any(axis=0) # voxels
            measure = measure[:,vox_not_nan]
            nVox_not_nan = measure.shape[1] # number of voxels  

            # update channel descriptor and measurements
            chn_des = {'voxels': np.array(['voxel_' + str(x) for x in np.arange(nVox_not_nan)])} # descriptors for channels
            dataset_test.measurements = measure
            dataset_test.channel_descriptors = chn_des

            dataset_merge_new.append(dataset_test)  

        # save dataset
        with open(os.path.join(subject,subject+'_'+epochs[epoch]+'_dataset_merge_across_sess_CN_SP2.pkg'),'wb') as f:
            pickle.dump(dataset_merge_new,f)



In [54]:
print(dataset_merge_new[0])

rsatoolbox.data.Dataset
measurements = 
[[-4.32295465 -6.16556215  0.7349382  ... -1.17331648  0.45550582
  -2.78499293]
 [-1.72085452 -3.63415432 -1.65540874 ...  0.33521384 -0.54659218
  -0.26850107]
 [-3.02165127 -6.29844046 -1.63729489 ...  0.08400786  1.30360365
  -1.44681382]
 [-3.38832998 -6.20992613  0.13150956 ...  0.90429479 -2.20506287
  -4.67829561]
 [-3.95717478 -6.65573072 -0.20510076 ... -3.87506557 -1.18707049
  -2.55028176]]
...

descriptors: 
ecc_or_set = P2
roi = area4-ju50
subj = f19
name = f19 | area4-ju50 | P2


obs_descriptors: 
pattern_index = [0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1
 1 1 1 0 0 0 0 1 1 1 1]
full_conds = ['CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR'
 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR'
 'NWL' 'NTL' 'NWR' 'NTR' 'CWL' 'CTL' 'CWR' 'CTR' 'NWL' 'NTL' 'NWR' 'NTR']
sess_index = [0 0 0 0

# The END