## Extracting Data

Using Group class from task.py module to extract Data based on conditions (faces, places, tools, places) for both 0-back and 2-back Working Memory task for all 339 participants

In [1]:
# Add modules folder to Python's search path
from os import times
import sys
from pathlib import Path
from os.path import dirname, realpath, abspath
script_dir = Path(abspath(''))
module_dir = str(script_dir.parent)
sys.path.insert(0, module_dir + '/modules')
#print(module_dir)
#print(script_dir)

In [2]:
# Other required imports
import numpy as np
import pandas as pd
import task

In [3]:
# creating an instance of Group class
db_path = '{}/data/hcp_task'.format(module_dir)
group = task.Group(db_path)

In [4]:
# conditions
conditions_0bk = ['0bk_faces', '0bk_tools', '0bk_places', '0bk_body']
conditions_2bk = ['2bk_faces', '2bk_tools', '2bk_places', '2bk_body']

# Loading data for both 0-back and 2-back 
# X are the features, y are the labels
# Dimensions of X = (28080, 1356); (360 ROIS x 78 frames) X (339 subjects x 4 conditions)
#               y = (1356,) 
X_0bk, y_0bk = group.extract_cons(conditions_0bk)
X_2bk, y_2bk = group.extract_cons(conditions_2bk)

Time taken by extract_con is 37.97873258590698 seconds
Time taken by extract_con is 8.357774019241333 seconds
Time taken by extract_con is 8.564321279525757 seconds
Time taken by extract_con is 8.237513780593872 seconds
Time taken by extract_cons is 63.24576115608215 seconds
Time taken by extract_con is 8.10595178604126 seconds
Time taken by extract_con is 7.790438175201416 seconds
Time taken by extract_con is 8.18117904663086 seconds
Time taken by extract_con is 8.115039587020874 seconds
Time taken by extract_cons is 32.29670810699463 seconds


In [5]:
# regions info 
regions = np.load('{}/regions.npy'.format(db_path))

roi_network = regions[:, :2] # All the 360 ROIs and the network names
n_subjects = 339

## Converting the numpy ndarray into a Pandas DataFrame object

In [6]:
# defining indexed for using in pd.MultiIndex

index_0bk = []
for cond in conditions_0bk:
    for subj in np.arange(n_subjects):
        for roi_net in roi_network:
            index_0bk.append((cond, subj, roi_net[0], roi_net[1]))
            

index_2bk = []
for cond in conditions_2bk:
    for subj in np.arange(n_subjects):
        for roi_net in roi_network:
            index_2bk.append((cond, subj, roi_net[0], roi_net[1]))

In [7]:
# Features X as DataFrame object
index_0bk = pd.MultiIndex.from_tuples(index_0bk)
index_2bk = pd.MultiIndex.from_tuples(index_2bk)

# Dimensions of X_df : (488160, 78) : (4 conditions x 339 subjects x 360 ROIs) X (78 frames)

X_0bk_df = pd.DataFrame(X_0bk.reshape(488160, 78), index=index_0bk)
X_0bk_df.index.names = ['Condition', 'Subject_ID', 'ROI', 'Network']

X_2bk_df = pd.DataFrame(X_2bk.reshape(488160, 78), index=index_2bk)
X_2bk_df.index.names = ['Condition', 'Subject_ID', 'ROI', 'Network']

In [9]:
X_0bk_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,0,1,2,3,4,5,6,7,8,9,...,68,69,70,71,72,73,74,75,76,77
Condition,Subject_ID,ROI,Network,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
0bk_faces,0,R_V1,Visual1,12028.0,11983.2,11991.3,11979.2,12006.0,11998.1,12008.0,12009.8,11986.2,12008.8,...,12078.0,12048.5,12029.4,12021.9,12003.7,11981.9,12006.8,11996.9,12034.0,12051.1
0bk_faces,0,R_MST,Visual2,10374.7,10356.4,10295.0,10341.2,10320.6,10329.0,10340.4,10283.0,10346.5,10368.5,...,10367.6,10343.9,10358.8,10380.3,10395.0,10398.0,10402.3,10367.7,10381.3,10351.5
0bk_faces,0,R_V6,Visual2,12376.7,12335.6,12296.3,12325.5,12326.1,12342.4,12351.6,12299.0,12411.3,12301.9,...,12424.0,12465.6,12482.8,12499.3,12472.8,12455.6,12544.5,12482.3,12491.8,12545.8
0bk_faces,0,R_V2,Visual2,12099.2,12095.5,12120.9,12080.0,12093.1,12087.4,12114.3,12112.9,12113.8,12152.6,...,12119.4,12123.8,12135.1,12107.3,12083.2,12097.8,12105.4,12096.7,12115.4,12177.3
0bk_faces,0,R_V3,Visual2,11593.1,11553.0,11543.1,11552.2,11588.7,11567.7,11579.6,11557.6,11617.5,11594.2,...,11598.3,11609.5,11624.7,11638.2,11580.7,11613.6,11610.8,11580.6,11622.3,11621.1


In [10]:
X_2bk_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,0,1,2,3,4,5,6,7,8,9,...,68,69,70,71,72,73,74,75,76,77
Condition,Subject_ID,ROI,Network,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
2bk_faces,0,R_V1,Visual1,12004.5,12000.5,12004.1,12000.5,12023.5,12060.8,12069.9,12122.1,12146.9,12136.2,...,12172.4,12146.1,12176.3,12157.9,12134.1,12170.2,12142.1,12151.8,12126.5,12158.7
2bk_faces,0,R_MST,Visual2,10417.4,10358.0,10386.2,10399.9,10351.4,10418.7,10487.7,10418.0,10465.7,10435.9,...,10382.8,10383.8,10349.8,10444.2,10395.7,10448.2,10406.3,10435.2,10418.7,10364.0
2bk_faces,0,R_V6,Visual2,12266.4,12277.7,12283.8,12232.4,12354.0,12397.7,12411.1,12395.5,12381.8,12325.5,...,12605.8,12565.4,12584.7,12591.6,12529.0,12534.6,12571.2,12556.0,12567.4,12497.2
2bk_faces,0,R_V2,Visual2,12077.8,12094.1,12080.8,12110.0,12109.9,12154.5,12138.6,12209.3,12162.9,12216.0,...,12205.2,12209.6,12233.0,12237.2,12178.6,12185.7,12226.9,12229.9,12183.7,12190.7
2bk_faces,0,R_V3,Visual2,11602.7,11603.4,11629.0,11608.6,11636.5,11649.8,11669.8,11704.3,11664.0,11707.0,...,11705.4,11719.5,11711.7,11730.7,11688.2,11716.2,11751.3,11733.3,11706.0,11726.2


## Normalization
If we are normalizing the data

## Contrast 2bk - 0bk
Getting the contrast on the network level

In [16]:
# Unpacking indices (to convert them to columns)

X_0bk_df.reset_index(level=0, inplace=True)
X_0bk_df.reset_index(level=0, inplace=True)
X_0bk_df.reset_index(level=0, inplace=True)
X_0bk_df.reset_index(level=0, inplace=True)
X_0bk_df.head()

ValueError: cannot insert level_0, already exists