In [1]:
import pandas as pd
from nilearn import image
import numpy as np

Load our subject's run data into a panda's `DataFrame`, our final objective with this is creating a `numpy.ndarray`  of type `bool` to "pick out" the volumes that correspond to the user seeing the source code from the stimulus.

In [63]:
sub = "08"
run = "01"

In [64]:
b_df = pd.read_csv(f"data/sub-08/func/sub-{sub}_task-ProgramCategorization_run-{run}_events.tsv", sep="\s+")
b_df

Unnamed: 0,onset,duration,trial_no,event_type,stim_file,category,subcategory,response,correctness
0,0,16,0,dummy_trial,,,,,
1,16,2,1,baseline,,,,,
2,18,10,1,source_code,0013_MATH_GCD.java,Math,GreatestCommonDivider,,
3,28,4,1,response,,,,Math,correct
4,32,2,2,baseline,,,,,
...,...,...,...,...,...,...,...,...,...
104,562,10,35,source_code,0045_SORT_IST.java,Sort,InsertionSort,,
105,572,4,35,response,,,,Sort,correct
106,576,2,36,baseline,,,,,
107,578,10,36,source_code,0025_SEARCH_LSC.java,Search,LinerSearch,,


Remove the `dummy_trial` from the dataframe (note tha the dummy trial took 16 seconds (8 volumes))

In [49]:
if b_df.iloc[0].event_type == "dummy_trial":
    b_df = b_df.drop(index=0, axis=0)

In [50]:
b_df

Unnamed: 0,onset,duration,trial_no,event_type,stim_file,category,subcategory,response,correctness
1,16,2,1,baseline,,,,,
2,18,10,1,source_code,0013_MATH_GCD.java,Math,GreatestCommonDivider,,
3,28,4,1,response,,,,Math,correct
4,32,2,2,baseline,,,,,
5,34,10,2,source_code,0034_SEARCH_LSC.java,Search,LinerSearch,,
...,...,...,...,...,...,...,...,...,...
104,562,10,35,source_code,0045_SORT_IST.java,Sort,InsertionSort,,
105,572,4,35,response,,,,Sort,correct
106,576,2,36,baseline,,,,,
107,578,10,36,source_code,0025_SEARCH_LSC.java,Search,LinerSearch,,


Let's load the functinal images to examine them

In [51]:
bold = image.load_img("data/sub-08/func/sub-08_task-ProgramCategorization_run-01_bold.nii.gz")
bold.shape

(96, 96, 75, 296)

Each functional image has `296` volumes, each of those volumes is recorded  every 2 seconds.
For each functional image we will:
1. Remove the dummy trial 16s of readings, which corresponds to the first 8 volumes (16s/TR = 16s/2s = 8)
   - This will leave us with volumes with the following event pattern, repeated 36 times (there are 36 trials per run):
      1. `baseline` event of 2s (one image volume)
      2. `source_code` event of 10s (this is what we want! for our regressors we will use the first 8s out of the 10s, so the first 4 volumes (4s/TR = 4s/2s = 4))
      3. `response` event of 4s
2. From our whole functional image, extract the 4 volumes that we are interested in (see above 1.B)

Remove the dummy trial 16s of readings, which corresponds to the first 8 volumes (16s/TR = 16s/2s = 8)

In [52]:
bold = bold.slicer[:,:,:,8:]
bold.shape

(96, 96, 75, 288)

From our whole functional image, extract the 4 volumes that we are interested in (see above 1.B)

In [54]:
condition_trial = np.zeros(8)
condition_trial.shape

(8,)

In [55]:
condition_trial[1:5] = 1
condition_trial

array([0., 1., 1., 1., 1., 0., 0., 0.])

There are 36 trials, so we create a new array as `condition_trial` but repeated 36 times

In [56]:
condition_mask = np.tile(condition_trial, 36).astype(bool)
condition_mask

array([False,  True,  True,  True,  True, False, False, False, False,
        True,  True,  True,  True, False, False, False, False,  True,
        True,  True,  True, False, False, False, False,  True,  True,
        True,  True, False, False, False, False,  True,  True,  True,
        True, False, False, False, False,  True,  True,  True,  True,
       False, False, False, False,  True,  True,  True,  True, False,
       False, False, False,  True,  True,  True,  True, False, False,
       False, False,  True,  True,  True,  True, False, False, False,
       False,  True,  True,  True,  True, False, False, False, False,
        True,  True,  True,  True, False, False, False, False,  True,
        True,  True,  True, False, False, False, False,  True,  True,
        True,  True, False, False, False, False,  True,  True,  True,
        True, False, False, False, False,  True,  True,  True,  True,
       False, False, False, False,  True,  True,  True,  True, False,
       False, False,

In [57]:
condition_mask.shape

(288,)

Create a new `bold_code` image, by extracting the volumes that correspond to the array `condition_mask`

In [58]:
bold_code = image.index_img(bold, condition_mask)
bold_code.shape

(96, 96, 75, 144)

In [61]:
b_df

Unnamed: 0,onset,duration,trial_no,event_type,stim_file,category,subcategory,response,correctness
1,16,2,1,baseline,,,,,
2,18,10,1,source_code,0013_MATH_GCD.java,Math,GreatestCommonDivider,,
3,28,4,1,response,,,,Math,correct
4,32,2,2,baseline,,,,,
5,34,10,2,source_code,0034_SEARCH_LSC.java,Search,LinerSearch,,
...,...,...,...,...,...,...,...,...,...
104,562,10,35,source_code,0045_SORT_IST.java,Sort,InsertionSort,,
105,572,4,35,response,,,,Sort,correct
106,576,2,36,baseline,,,,,
107,578,10,36,source_code,0025_SEARCH_LSC.java,Search,LinerSearch,,


In [65]:
labels_df = b_df.drop(b_df[~b_df["event_type"].isin(["source_code","response"])].index)
labels_df = labels_df[["trial_no","event_type","stim_file","category","response","correctness"]]
labels_df = labels_df.reset_index(drop=True)
labels_df

Unnamed: 0,trial_no,event_type,stim_file,category,response,correctness
0,1,source_code,0013_MATH_GCD.java,Math,,
1,1,response,,,Math,correct
2,2,source_code,0034_SEARCH_LSC.java,Search,,
3,2,response,,,Search,correct
4,3,source_code,0023_SEARCH_BSC.java,Search,,
...,...,...,...,...,...,...
67,34,response,,,String,correct
68,35,source_code,0045_SORT_IST.java,Sort,,
69,35,response,,,Sort,correct
70,36,source_code,0025_SEARCH_LSC.java,Search,,


In [67]:
rows = []
for i in range(0,len(labels_df)-1,2):
    if labels_df.loc[i]["trial_no"] != labels_df.loc[i+1]["trial_no"]:
        raise NameError(f"expected {i} and {i+1} elemnts to have the same trial_no, got {labels_df.loc[i]['trial_no']} and {labels_df.loc[i+1]['trial_no']}")
    row = [
        labels_df.loc[i]["trial_no"],
        labels_df.loc[i]["stim_file"],
        labels_df.loc[i]["category"],
        labels_df.loc[i+1]["response"],
        True if labels_df.loc[i+1]["correctness"] == "correct" else False,
    ]
    for _ in range(0,4):
        rows.append(row)

l_df = pd.DataFrame(rows, columns=["trial_no","stim_file","category","response","correct"])
l_df

Unnamed: 0,trial_no,stim_file,category,response,correct
0,1,0013_MATH_GCD.java,Math,Math,True
1,1,0013_MATH_GCD.java,Math,Math,True
2,1,0013_MATH_GCD.java,Math,Math,True
3,1,0013_MATH_GCD.java,Math,Math,True
4,2,0034_SEARCH_LSC.java,Search,Search,True
...,...,...,...,...,...
139,35,0045_SORT_IST.java,Sort,Sort,True
140,36,0025_SEARCH_LSC.java,Search,Search,True
141,36,0025_SEARCH_LSC.java,Search,Search,True
142,36,0025_SEARCH_LSC.java,Search,Search,True


In [70]:
labels_list = []
for i in range(0,len(labels_df)-1,2):
    if labels_df.loc[i]["trial_no"] != labels_df.loc[i+1]["trial_no"]:
        raise NameError(f"expected {i} and {i+1} elemnts to have the same trial_no, got {labels_df.loc[i]['trial_no']} and {labels_df.loc[i+1]['trial_no']}")
    for _ in range(0,4):
        labels_list.append(labels_df.loc[i]["category"])

labels = np.array(labels_list)
print(labels.shape)
labels

(144,)


array(['Math', 'Math', 'Math', 'Math', 'Search', 'Search', 'Search',
       'Search', 'Search', 'Search', 'Search', 'Search', 'Math', 'Math',
       'Math', 'Math', 'Sort', 'Sort', 'Sort', 'Sort', 'Search', 'Search',
       'Search', 'Search', 'Sort', 'Sort', 'Sort', 'Sort', 'Search',
       'Search', 'Search', 'Search', 'Math', 'Math', 'Math', 'Math',
       'Sort', 'Sort', 'Sort', 'Sort', 'Sort', 'Sort', 'Sort', 'Sort',
       'String', 'String', 'String', 'String', 'Search', 'Search',
       'Search', 'Search', 'Math', 'Math', 'Math', 'Math', 'String',
       'String', 'String', 'String', 'Search', 'Search', 'Search',
       'Search', 'Sort', 'Sort', 'Sort', 'Sort', 'Search', 'Search',
       'Search', 'Search', 'Search', 'Search', 'Search', 'Search',
       'String', 'String', 'String', 'String', 'Math', 'Math', 'Math',
       'Math', 'Math', 'Math', 'Math', 'Math', 'String', 'String',
       'String', 'String', 'Math', 'Math', 'Math', 'Math', 'String',
       'String', 'String', '

In [72]:
# from nilearn import input_data
# from nilearn import plotting

In [73]:
# nifti_masker = input_data.NiftiMasker(
#     mask_strategy='epi', memory="nilearn_cache", memory_level=2,
# #     detrend=True,
#     standardize=True,
#     smoothing_fwhm=8,
#     mask_args=dict(opening=3)
# )
# nifti_masker.fit(bold)
# mask_img = nifti_masker.mask_img_
# plotting.plot_img(mask_img)
# report = nifti_masker.generate_report()
# report

In [74]:
# # Make processing parallel
# # /!\ As each thread will print its progress, n_jobs > 1 could mess up the
# #     information output.
# n_jobs = 1

# # Define the cross-validation scheme used for validation.
# # Here we use a KFold cross-validation on the session, which corresponds to
# # splitting the samples in 4 folds and make 4 runs using each fold as a test
# # set once and the others as learning sets
# from sklearn.model_selection import KFold
# cv = KFold(n_splits=4)

# from nilearn import decoding

In [75]:
# # The radius is the one of the Searchlight sphere that will scan the volume
# searchlight = decoding.SearchLight(
#     mask_img,
# #     process_mask_img=process_mask_img,
#     radius=4, n_jobs=1,
#     verbose=1, cv=cv)

In [76]:
# searchlight.fit(bold_code, labels)