In [1]:
import sys
import os

import scipy.signal
import sklearn.preprocessing
from sklearn.feature_selection import SelectKBest, f_classif

import pandas as pd

import numpy as np

import h5py

sys.path.insert(0, 'D:/Downloads/Trends_neuroimaging/Data')

In [2]:
# image and mask directories
BASE_PATH = f'Data/'
train_data_dir = f'{BASE_PATH}/fMRI_train'

print('Reading data...')
loading_data = pd.read_csv(f'{BASE_PATH}/loading.csv')
train_data = pd.read_csv(f'{BASE_PATH}/train_scores.csv')
fnc_data = pd.read_csv(f"{BASE_PATH}/fnc.csv")
print('Reading data completed')

Reading data...
Reading data completed


In [3]:
## Detrending over time for all voxels
def detrend(data, axis=-1, type='linear', inplace='false'):
    return scipy.signal.detrend(data, axis, type, overwrite_data=inplace)

In [4]:
def normalize(data, norm='l2', axis='1', inplace='false', return_norm='true'):
    return sklearn.preprocessing.normalize(data, norm, axis, return_norm, copy=inplace)

In [5]:
print('Loading.csv: ', loading_data.shape)
print('train_data.csv: ', train_data.shape)
print('fnc_data.csv: ', fnc_data.shape)

Loading.csv:  (11754, 27)
train_data.csv:  (5877, 6)
fnc_data.csv:  (11754, 1379)


In [6]:
loading_data.head()

Unnamed: 0,Id,IC_01,IC_07,IC_05,IC_16,IC_26,IC_06,IC_10,IC_09,IC_18,...,IC_08,IC_03,IC_21,IC_28,IC_11,IC_20,IC_30,IC_22,IC_29,IC_14
0,10001,0.00607,0.014466,0.004136,0.000658,-0.002742,0.005033,0.01672,0.003484,0.001797,...,0.018246,0.023711,0.009177,-0.013929,0.030696,0.010496,0.002892,-0.023235,0.022177,0.017192
1,10002,0.009087,0.009291,0.007049,-0.002076,-0.002227,0.004605,0.012277,0.002946,0.004086,...,0.014635,0.022556,0.012004,-0.011814,0.022479,0.005739,0.00288,-0.016609,0.025543,0.014524
2,10003,0.008151,0.014684,0.010444,-0.005293,-0.002913,0.015042,0.017745,0.00393,-0.008021,...,0.019565,0.030616,0.018184,-0.010469,0.029799,0.015435,0.005211,-0.028882,0.031427,0.018164
3,10004,0.004675,0.000957,0.006154,-0.000429,-0.001222,0.011755,0.01301,0.000193,0.008075,...,0.002658,0.022266,0.005956,-0.010595,0.024078,-0.000319,0.005866,-0.015182,0.024476,0.01476
4,10005,-0.000398,0.006878,0.009051,0.000369,0.000336,0.010679,0.010352,0.003637,0.00418,...,0.009702,0.017257,0.005454,-0.008591,0.019416,0.000786,0.002692,-0.019814,0.017105,0.013316


In [7]:
loading_data.describe()

Unnamed: 0,Id,IC_01,IC_07,IC_05,IC_16,IC_26,IC_06,IC_10,IC_09,IC_18,...,IC_08,IC_03,IC_21,IC_28,IC_11,IC_20,IC_30,IC_22,IC_29,IC_14
count,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,...,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0
mean,15877.5,0.005423,0.009251,0.010635,0.001054,-0.001271,0.01341,0.013749,0.001988,0.005053,...,0.010036,0.020869,0.009754,-0.008081,0.023412,0.005084,0.003595,-0.014729,0.026623,0.016187
std,3393.231867,0.004552,0.004153,0.003609,0.003591,0.002658,0.004032,0.003905,0.0032,0.005296,...,0.003914,0.003542,0.004609,0.003267,0.004578,0.003697,0.002846,0.005972,0.004039,0.003731
min,10001.0,-0.015894,-0.015927,-0.00224,-0.013459,-0.015118,-0.002929,0.001156,-0.009622,-0.027575,...,-0.005282,0.008878,-0.010426,-0.020051,0.008485,-0.007969,-0.00772,-0.040384,0.013261,0.001266
25%,12939.25,0.002527,0.006495,0.008159,-0.001319,-0.002904,0.010764,0.011048,-0.000185,0.002079,...,0.007356,0.018471,0.006692,-0.010231,0.020266,0.00255,0.001677,-0.018738,0.023791,0.013595
50%,15877.5,0.005546,0.009192,0.010572,0.000956,-0.001132,0.013513,0.013545,0.001917,0.005605,...,0.009942,0.02062,0.00986,-0.008101,0.023231,0.004953,0.003539,-0.014498,0.026411,0.016031
75%,18815.75,0.008476,0.01199,0.013009,0.003432,0.000535,0.016087,0.016186,0.004113,0.008734,...,0.012616,0.023098,0.012876,-0.005915,0.026302,0.007421,0.005454,-0.010635,0.029244,0.018599
max,21754.0,0.024189,0.029621,0.026218,0.022613,0.007863,0.028797,0.035586,0.015763,0.022121,...,0.028522,0.036454,0.026258,0.005262,0.045043,0.029049,0.016599,0.00571,0.04457,0.032066


In [8]:
train_data.head()

Unnamed: 0,Id,age,domain1_var1,domain1_var2,domain2_var1,domain2_var2
0,10001,57.436077,30.571975,62.553736,53.32513,51.427998
1,10002,59.580851,50.969456,67.470628,60.651856,58.311361
2,10004,71.413018,53.152498,58.012103,52.418389,62.536641
3,10005,66.53263,,,52.108977,69.993075
4,10007,38.617381,49.197021,65.674285,40.151376,34.096421


In [9]:
train_data.describe()

Unnamed: 0,Id,age,domain1_var1,domain1_var2,domain2_var1,domain2_var2
count,5877.0,5877.0,5439.0,5439.0,5838.0,5838.0
mean,15909.667007,50.034068,51.474692,59.244132,47.32513,51.905658
std,3411.775315,13.539881,10.188354,11.387595,11.124863,11.839203
min,10001.0,14.257265,15.769168,1.021874,0.991172,0.815285
25%,12961.0,40.129361,44.78124,52.396805,40.122682,44.51488
50%,15925.0,50.427747,51.847306,60.052535,47.811205,52.572032
75%,18886.0,59.580851,58.495576,67.142611,55.058014,59.910146
max,21754.0,84.491113,81.32558,94.702874,82.164478,94.509903


In [10]:
fnc_data.head()

Unnamed: 0,Id,SCN(53)_vs_SCN(69),SCN(98)_vs_SCN(69),SCN(99)_vs_SCN(69),SCN(45)_vs_SCN(69),ADN(21)_vs_SCN(69),ADN(56)_vs_SCN(69),SMN(3)_vs_SCN(69),SMN(9)_vs_SCN(69),SMN(2)_vs_SCN(69),...,CBN(13)_vs_DMN(94),CBN(18)_vs_DMN(94),CBN(4)_vs_DMN(94),CBN(7)_vs_DMN(94),CBN(18)_vs_CBN(13),CBN(4)_vs_CBN(13),CBN(7)_vs_CBN(13),CBN(4)_vs_CBN(18),CBN(7)_vs_CBN(18),CBN(7)_vs_CBN(4)
0,10001,0.36858,0.166876,0.438148,0.341007,-0.186251,0.049096,0.121417,-0.174268,-0.231578,...,-0.149279,0.552841,0.131046,0.335446,0.394867,-0.042853,0.124627,-0.060712,0.515964,0.290488
1,10002,0.151696,-0.024819,0.217504,0.418072,-0.227234,-0.064052,-0.143832,-0.118116,-0.054825,...,-0.214216,-0.039792,0.143014,-0.189962,0.498373,0.444231,0.592438,0.028649,0.705524,0.248327
2,10003,0.343415,0.109974,0.741641,0.578558,-0.676446,-0.43696,-0.295663,-0.37779,-0.344963,...,-0.154941,0.13685,-0.022361,0.137625,0.677972,0.409412,0.563892,0.438684,0.618204,0.284474
3,10004,0.132793,0.258255,0.490769,0.342717,0.091112,0.107969,0.02922,-0.026237,0.094742,...,-0.130339,0.30954,0.141469,0.030853,0.344394,0.214097,0.317556,0.012435,0.665937,0.081358
4,10005,0.291921,0.251254,0.41647,0.511719,-0.362626,-0.16471,-0.289059,-0.015537,-0.087316,...,-0.139525,0.394932,0.040443,0.428334,0.498837,0.266755,0.227379,0.028984,0.752343,0.087898


In [11]:
fnc_data.describe()

Unnamed: 0,Id,SCN(53)_vs_SCN(69),SCN(98)_vs_SCN(69),SCN(99)_vs_SCN(69),SCN(45)_vs_SCN(69),ADN(21)_vs_SCN(69),ADN(56)_vs_SCN(69),SMN(3)_vs_SCN(69),SMN(9)_vs_SCN(69),SMN(2)_vs_SCN(69),...,CBN(13)_vs_DMN(94),CBN(18)_vs_DMN(94),CBN(4)_vs_DMN(94),CBN(7)_vs_DMN(94),CBN(18)_vs_CBN(13),CBN(4)_vs_CBN(13),CBN(7)_vs_CBN(13),CBN(4)_vs_CBN(18),CBN(7)_vs_CBN(18),CBN(7)_vs_CBN(4)
count,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,...,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0,11754.0
mean,15877.5,0.246927,0.198147,0.520016,0.490069,-0.217425,-0.109341,-0.161214,-0.168549,-0.104565,...,-0.079791,0.324006,0.080442,0.249249,0.469321,0.348458,0.389868,0.142987,0.626399,0.298581
std,3393.231867,0.188442,0.19125,0.149495,0.150758,0.19453,0.183983,0.19694,0.197502,0.222499,...,0.189295,0.179264,0.145512,0.174422,0.195441,0.182376,0.22661,0.20066,0.140739,0.168661
min,10001.0,-0.62907,-0.554956,-0.483373,-0.209405,-0.798512,-0.71341,-0.798242,-0.851988,-0.861804,...,-0.747646,-0.537107,-0.585412,-0.585195,-0.301003,-0.28856,-0.479468,-0.557574,-0.315752,-0.302054
25%,12939.25,0.125744,0.075446,0.430992,0.385989,-0.355614,-0.232753,-0.295071,-0.298609,-0.249991,...,-0.21118,0.208489,-0.017199,0.135643,0.336983,0.218694,0.232897,0.005823,0.539361,0.18184
50%,15877.5,0.251728,0.19932,0.52891,0.486579,-0.215644,-0.10468,-0.14901,-0.158472,-0.095097,...,-0.084883,0.337088,0.082511,0.258028,0.47566,0.339374,0.391916,0.131073,0.636222,0.288689
75%,18815.75,0.372947,0.323079,0.619095,0.595955,-0.082527,0.015833,-0.022079,-0.030253,0.049065,...,0.048712,0.451829,0.179133,0.370526,0.611851,0.472629,0.556859,0.27302,0.727098,0.410891
max,21754.0,0.817093,0.929635,0.960302,0.942525,0.897264,0.909322,0.864636,0.540866,0.701827,...,0.589481,0.825865,0.638884,0.766774,0.9668,0.896814,0.938468,0.830948,0.960858,0.85394


In [12]:
def get_nan(data):
    ## displays NaNs in X
    total = data.isnull().sum().sort_values(ascending = False)
    percent = (data.isnull().sum()/data.isnull().count()*100).sort_values(ascending = False)
    missing_data  = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
    return missing_data

In [13]:
missing_data = get_nan(train_data)
missing_data 

Unnamed: 0,Total,Percent
domain1_var2,438,7.452782
domain1_var1,438,7.452782
domain2_var2,39,0.663604
domain2_var1,39,0.663604
age,0,0.0
Id,0,0.0


In [14]:
to_select_feature = train_data.join(loading_data.set_index('Id'), on='Id')
to_select_feature = to_select_feature.join(fnc_data.set_index('Id'), on='Id')
to_select_feature.head()

Unnamed: 0,Id,age,domain1_var1,domain1_var2,domain2_var1,domain2_var2,IC_01,IC_07,IC_05,IC_16,...,CBN(13)_vs_DMN(94),CBN(18)_vs_DMN(94),CBN(4)_vs_DMN(94),CBN(7)_vs_DMN(94),CBN(18)_vs_CBN(13),CBN(4)_vs_CBN(13),CBN(7)_vs_CBN(13),CBN(4)_vs_CBN(18),CBN(7)_vs_CBN(18),CBN(7)_vs_CBN(4)
0,10001,57.436077,30.571975,62.553736,53.32513,51.427998,0.00607,0.014466,0.004136,0.000658,...,-0.149279,0.552841,0.131046,0.335446,0.394867,-0.042853,0.124627,-0.060712,0.515964,0.290488
1,10002,59.580851,50.969456,67.470628,60.651856,58.311361,0.009087,0.009291,0.007049,-0.002076,...,-0.214216,-0.039792,0.143014,-0.189962,0.498373,0.444231,0.592438,0.028649,0.705524,0.248327
2,10004,71.413018,53.152498,58.012103,52.418389,62.536641,0.004675,0.000957,0.006154,-0.000429,...,-0.130339,0.30954,0.141469,0.030853,0.344394,0.214097,0.317556,0.012435,0.665937,0.081358
3,10005,66.53263,,,52.108977,69.993075,-0.000398,0.006878,0.009051,0.000369,...,-0.139525,0.394932,0.040443,0.428334,0.498837,0.266755,0.227379,0.028984,0.752343,0.087898
4,10007,38.617381,49.197021,65.674285,40.151376,34.096421,0.005192,0.010585,0.01216,-0.00092,...,-0.150218,0.408926,0.072004,0.157582,0.532046,0.355448,0.462675,0.161005,0.703679,0.293607


In [15]:
file_names = to_select_feature['Id']
to_select_feature = to_select_feature.dropna(axis=0)
to_select_feature = to_select_feature.set_index('Id')
to_select_feature

Unnamed: 0_level_0,age,domain1_var1,domain1_var2,domain2_var1,domain2_var2,IC_01,IC_07,IC_05,IC_16,IC_26,...,CBN(13)_vs_DMN(94),CBN(18)_vs_DMN(94),CBN(4)_vs_DMN(94),CBN(7)_vs_DMN(94),CBN(18)_vs_CBN(13),CBN(4)_vs_CBN(13),CBN(7)_vs_CBN(13),CBN(4)_vs_CBN(18),CBN(7)_vs_CBN(18),CBN(7)_vs_CBN(4)
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10001,57.436077,30.571975,62.553736,53.325130,51.427998,0.006070,0.014466,0.004136,0.000658,-0.002742,...,-0.149279,0.552841,0.131046,0.335446,0.394867,-0.042853,0.124627,-0.060712,0.515964,0.290488
10002,59.580851,50.969456,67.470628,60.651856,58.311361,0.009087,0.009291,0.007049,-0.002076,-0.002227,...,-0.214216,-0.039792,0.143014,-0.189962,0.498373,0.444231,0.592438,0.028649,0.705524,0.248327
10004,71.413018,53.152498,58.012103,52.418389,62.536641,0.004675,0.000957,0.006154,-0.000429,-0.001222,...,-0.130339,0.309540,0.141469,0.030853,0.344394,0.214097,0.317556,0.012435,0.665937,0.081358
10007,38.617381,49.197021,65.674285,40.151376,34.096421,0.005192,0.010585,0.012160,-0.000920,-0.002255,...,-0.150218,0.408926,0.072004,0.157582,0.532046,0.355448,0.462675,0.161005,0.703679,0.293607
10008,35.326582,15.769168,65.782269,44.643805,50.448485,0.007745,0.009748,0.009356,-0.004219,-0.003852,...,-0.080562,0.005339,-0.386757,0.020546,0.518383,0.408071,0.465851,0.112785,0.574596,0.178531
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21746,14.257265,21.358872,61.165998,51.778483,54.640179,-0.001115,0.007108,0.008652,0.003596,0.000950,...,-0.249481,0.205351,0.012067,0.310750,0.426335,0.193812,0.158720,0.055471,0.568766,0.160516
21747,55.456978,68.169675,29.907995,55.349257,54.019517,0.007263,0.016489,0.012704,0.004357,-0.005044,...,-0.119170,0.201846,-0.008290,0.119828,0.551936,0.598931,0.511816,0.303312,0.704483,0.461588
21750,48.948756,55.114811,60.878271,38.617246,50.679885,0.005996,0.003873,0.012353,0.000242,-0.002159,...,-0.103786,0.375065,0.104857,0.262614,0.502715,0.322353,0.458041,0.343754,0.705207,0.341224
21752,66.532630,59.844808,72.303110,55.458281,46.870235,0.000627,0.011407,0.010957,0.000534,-0.000347,...,0.229712,0.431489,0.039062,0.119474,0.523894,0.445209,0.332011,0.228977,0.560968,0.263504


In [16]:
Y = to_select_feature
Y_age = Y['age']
Y_d1v1 = Y['domain1_var1']
Y_d1v2 = Y['domain1_var2']
Y_d2v1 = Y['domain2_var1']
Y_d2v2 = Y['domain2_var2']
Y

Unnamed: 0_level_0,age,domain1_var1,domain1_var2,domain2_var1,domain2_var2,IC_01,IC_07,IC_05,IC_16,IC_26,...,CBN(13)_vs_DMN(94),CBN(18)_vs_DMN(94),CBN(4)_vs_DMN(94),CBN(7)_vs_DMN(94),CBN(18)_vs_CBN(13),CBN(4)_vs_CBN(13),CBN(7)_vs_CBN(13),CBN(4)_vs_CBN(18),CBN(7)_vs_CBN(18),CBN(7)_vs_CBN(4)
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10001,57.436077,30.571975,62.553736,53.325130,51.427998,0.006070,0.014466,0.004136,0.000658,-0.002742,...,-0.149279,0.552841,0.131046,0.335446,0.394867,-0.042853,0.124627,-0.060712,0.515964,0.290488
10002,59.580851,50.969456,67.470628,60.651856,58.311361,0.009087,0.009291,0.007049,-0.002076,-0.002227,...,-0.214216,-0.039792,0.143014,-0.189962,0.498373,0.444231,0.592438,0.028649,0.705524,0.248327
10004,71.413018,53.152498,58.012103,52.418389,62.536641,0.004675,0.000957,0.006154,-0.000429,-0.001222,...,-0.130339,0.309540,0.141469,0.030853,0.344394,0.214097,0.317556,0.012435,0.665937,0.081358
10007,38.617381,49.197021,65.674285,40.151376,34.096421,0.005192,0.010585,0.012160,-0.000920,-0.002255,...,-0.150218,0.408926,0.072004,0.157582,0.532046,0.355448,0.462675,0.161005,0.703679,0.293607
10008,35.326582,15.769168,65.782269,44.643805,50.448485,0.007745,0.009748,0.009356,-0.004219,-0.003852,...,-0.080562,0.005339,-0.386757,0.020546,0.518383,0.408071,0.465851,0.112785,0.574596,0.178531
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21746,14.257265,21.358872,61.165998,51.778483,54.640179,-0.001115,0.007108,0.008652,0.003596,0.000950,...,-0.249481,0.205351,0.012067,0.310750,0.426335,0.193812,0.158720,0.055471,0.568766,0.160516
21747,55.456978,68.169675,29.907995,55.349257,54.019517,0.007263,0.016489,0.012704,0.004357,-0.005044,...,-0.119170,0.201846,-0.008290,0.119828,0.551936,0.598931,0.511816,0.303312,0.704483,0.461588
21750,48.948756,55.114811,60.878271,38.617246,50.679885,0.005996,0.003873,0.012353,0.000242,-0.002159,...,-0.103786,0.375065,0.104857,0.262614,0.502715,0.322353,0.458041,0.343754,0.705207,0.341224
21752,66.532630,59.844808,72.303110,55.458281,46.870235,0.000627,0.011407,0.010957,0.000534,-0.000347,...,0.229712,0.431489,0.039062,0.119474,0.523894,0.445209,0.332011,0.228977,0.560968,0.263504


In [17]:
X = to_select_feature.drop(['age', 'domain1_var1', 'domain1_var2', 'domain2_var1', 'domain2_var2'], axis=1)
X

Unnamed: 0_level_0,IC_01,IC_07,IC_05,IC_16,IC_26,IC_06,IC_10,IC_09,IC_18,IC_04,...,CBN(13)_vs_DMN(94),CBN(18)_vs_DMN(94),CBN(4)_vs_DMN(94),CBN(7)_vs_DMN(94),CBN(18)_vs_CBN(13),CBN(4)_vs_CBN(13),CBN(7)_vs_CBN(13),CBN(4)_vs_CBN(18),CBN(7)_vs_CBN(18),CBN(7)_vs_CBN(4)
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10001,0.006070,0.014466,0.004136,0.000658,-0.002742,0.005033,0.016720,0.003484,0.001797,0.029223,...,-0.149279,0.552841,0.131046,0.335446,0.394867,-0.042853,0.124627,-0.060712,0.515964,0.290488
10002,0.009087,0.009291,0.007049,-0.002076,-0.002227,0.004605,0.012277,0.002946,0.004086,0.027333,...,-0.214216,-0.039792,0.143014,-0.189962,0.498373,0.444231,0.592438,0.028649,0.705524,0.248327
10004,0.004675,0.000957,0.006154,-0.000429,-0.001222,0.011755,0.013010,0.000193,0.008075,0.027787,...,-0.130339,0.309540,0.141469,0.030853,0.344394,0.214097,0.317556,0.012435,0.665937,0.081358
10007,0.005192,0.010585,0.012160,-0.000920,-0.002255,0.011416,0.013838,0.001929,0.003051,0.031867,...,-0.150218,0.408926,0.072004,0.157582,0.532046,0.355448,0.462675,0.161005,0.703679,0.293607
10008,0.007745,0.009748,0.009356,-0.004219,-0.003852,0.012024,0.010205,0.002903,0.000870,0.037699,...,-0.080562,0.005339,-0.386757,0.020546,0.518383,0.408071,0.465851,0.112785,0.574596,0.178531
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21746,-0.001115,0.007108,0.008652,0.003596,0.000950,0.016314,0.017090,0.003513,0.004217,0.027352,...,-0.249481,0.205351,0.012067,0.310750,0.426335,0.193812,0.158720,0.055471,0.568766,0.160516
21747,0.007263,0.016489,0.012704,0.004357,-0.005044,0.013909,0.019284,-0.006267,-0.000456,0.031161,...,-0.119170,0.201846,-0.008290,0.119828,0.551936,0.598931,0.511816,0.303312,0.704483,0.461588
21750,0.005996,0.003873,0.012353,0.000242,-0.002159,0.020201,0.020931,0.003684,-0.002458,0.033895,...,-0.103786,0.375065,0.104857,0.262614,0.502715,0.322353,0.458041,0.343754,0.705207,0.341224
21752,0.000627,0.011407,0.010957,0.000534,-0.000347,0.013499,0.010541,0.001867,0.007447,0.020901,...,0.229712,0.431489,0.039062,0.119474,0.523894,0.445209,0.332011,0.228977,0.560968,0.263504


In [18]:
num_features = 20
def select_features(X, y, percentile):
    selector = SelectKBest(f_classif, k=percentile)
    X_new = selector.fit_transform(X, y)
    cols = selector.get_support(indices=True)
    return [X_new, cols]

In [19]:
X_age, cols = select_features(X, Y_age, num_features)
print('X_age: ', X_age.shape)
df = X.iloc[:, cols]
df

X_age:  (5434, 20)


Unnamed: 0_level_0,IC_05,IC_06,IC_04,IC_24,IC_15,IC_02,IC_21,IC_28,IC_20,IC_22,VSN(93)_vs_SCN(69),CON(63)_vs_SCN(69),CON(67)_vs_SCN(69),CON(38)_vs_SCN(69),CBN(18)_vs_SCN(69),SCN(99)_vs_SCN(98),VSN(93)_vs_SCN(45),CON(67)_vs_SCN(45),CON(38)_vs_SCN(45),DMN(71)_vs_DMN(32)
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
10001,0.004136,0.005033,0.029223,0.014609,0.005123,0.008492,0.009177,-0.013929,0.010496,-0.023235,0.169682,-0.224618,-0.104753,-0.204877,-0.236896,0.384276,0.077604,-0.149885,-0.259913,0.538605
10002,0.007049,0.004605,0.027333,0.006232,0.008819,0.003573,0.012004,-0.011814,0.005739,-0.016609,-0.106545,-0.296022,0.050343,-0.119580,0.125297,0.548919,-0.199981,-0.071426,-0.129928,0.312441
10004,0.006154,0.011755,0.027787,0.004144,0.006837,0.007893,0.005956,-0.010595,-0.000319,-0.015182,-0.215010,-0.145296,0.026584,0.105518,-0.032029,0.645483,-0.195734,0.112778,0.014720,0.422041
10007,0.012160,0.011416,0.031867,0.007295,0.013983,0.005516,0.016791,-0.009594,0.003731,-0.008462,-0.240210,0.113599,0.263209,0.230909,0.243688,0.743055,-0.127963,-0.004517,0.264013,0.765829
10008,0.009356,0.012024,0.037699,0.002737,0.010363,0.006014,0.014109,-0.006456,0.004483,-0.013822,-0.121159,0.100405,-0.109874,-0.173042,-0.203350,0.633374,-0.176583,-0.089372,-0.335186,0.362035
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21746,0.008652,0.016314,0.027352,0.009887,0.012812,0.010571,0.011081,-0.015960,0.006943,-0.011562,-0.138648,-0.018043,0.217287,0.040869,-0.062062,0.672191,-0.130231,0.117187,-0.053205,0.657990
21747,0.012704,0.013909,0.031161,0.005840,0.006906,0.007112,0.010703,-0.010636,0.001316,-0.022357,0.280003,-0.192978,-0.177325,-0.148506,0.085147,0.779743,-0.083011,0.088666,0.073966,0.672751
21750,0.012353,0.020201,0.033895,0.002727,0.012912,0.013347,0.006448,-0.007203,0.004942,-0.020226,-0.028291,-0.054711,0.089430,0.009955,0.019131,0.402390,-0.036810,0.114416,-0.004793,0.463477
21752,0.010957,0.013499,0.020901,0.005250,0.010305,0.004507,0.001160,-0.007140,0.002026,-0.014612,-0.336247,-0.098283,0.069331,0.277888,0.202814,0.718507,-0.386493,0.082594,0.234990,0.550293


In [20]:
X_d1v1, cols = select_features(X, Y_d1v1, num_features)
print('X_d1v1: ', X_age.shape)
df = X.iloc[:, cols]
df

X_d1v1:  (5434, 20)


Unnamed: 0_level_0,IC_12,IC_15,SMN(9)_vs_SCN(69),SMN(66)_vs_SCN(69),VSN(93)_vs_SCN(98),CON(33)_vs_SCN(98),CON(61)_vs_SCN(98),VSN(16)_vs_SMN(9),VSN(15)_vs_SMN(9),VSN(8)_vs_SMN(9),VSN(8)_vs_SMN(2),VSN(5)_vs_SMN(27),CBN(4)_vs_SMN(80),CON(79)_vs_SMN(72),VSN(77)_vs_VSN(12),CON(79)_vs_CON(68),CON(81)_vs_CON(43),CON(63)_vs_CON(61),CBN(4)_vs_CON(96),CBN(18)_vs_DMN(32)
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
10001,0.007778,0.005123,-0.174268,-0.237781,0.061363,0.106191,-0.032733,-0.196441,-0.218899,-0.206151,-0.194491,-0.189249,-0.205466,0.506615,0.415278,-0.161351,-0.650465,0.103365,0.135609,0.162034
10002,0.005419,0.008819,-0.118116,-0.123721,0.011817,0.281979,-0.039436,0.025548,0.052610,0.017059,-0.256147,-0.017400,-0.186921,0.550249,0.644697,-0.532091,-0.641973,0.230863,0.022653,-0.111987
10004,-0.005184,0.006837,-0.026237,-0.123536,-0.249878,0.167544,-0.028234,-0.084339,-0.068464,0.002111,-0.203712,-0.240904,-0.107236,0.515128,0.561369,-0.304497,-0.681688,-0.098098,0.090604,-0.093209
10007,-0.003470,0.013983,-0.190934,-0.196622,0.164454,0.243260,0.201671,-0.196746,-0.201379,-0.363255,-0.431900,-0.027559,0.047974,0.571199,0.513946,0.070705,-0.563614,0.384572,0.038178,0.068315
10008,-0.004683,0.010363,-0.188684,-0.218577,-0.253535,0.502345,0.095305,-0.073015,0.021830,-0.015584,-0.106409,-0.216865,0.190114,0.244827,0.259507,0.186585,-0.572532,0.580120,-0.455242,0.257266
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21746,-0.008532,0.012812,-0.204301,-0.085494,-0.036488,0.359534,0.074277,0.099193,0.059134,0.129983,-0.257288,-0.028123,0.040496,0.440924,0.426671,-0.160196,-0.433337,0.193070,-0.008901,-0.045315
21747,0.002536,0.006906,-0.225046,-0.200598,-0.286451,0.403434,0.003459,0.073125,0.047377,0.094822,0.293412,0.599027,-0.197806,0.502043,0.299722,0.109805,-0.520445,0.159934,0.002706,0.076437
21750,-0.005588,0.012912,-0.273216,-0.360215,-0.132522,0.198278,-0.126540,-0.359769,-0.534185,-0.470276,-0.267290,-0.517697,-0.104845,0.409956,0.282012,-0.012652,-0.668469,0.185859,0.064626,0.034142
21752,0.000084,0.010305,-0.122015,-0.269417,-0.305124,0.016823,-0.076639,-0.000471,-0.144087,-0.268829,-0.162456,-0.269294,-0.009966,0.663694,0.562030,-0.101714,-0.339811,0.627081,0.089327,0.146599


In [21]:
X_d1v2, cols = select_features(X, Y_d1v2, num_features)
print('X_d1v2: ', X_age.shape)
df = X.iloc[:, cols]
df

X_d1v2:  (5434, 20)


Unnamed: 0_level_0,IC_12,IC_15,SMN(9)_vs_SCN(69),SMN(66)_vs_SCN(69),VSN(93)_vs_SCN(98),CON(33)_vs_SCN(98),CON(61)_vs_SCN(98),VSN(16)_vs_SMN(9),VSN(15)_vs_SMN(9),VSN(8)_vs_SMN(9),VSN(8)_vs_SMN(2),VSN(5)_vs_SMN(27),CBN(4)_vs_SMN(80),CON(79)_vs_SMN(72),VSN(77)_vs_VSN(12),CON(79)_vs_CON(68),CON(81)_vs_CON(43),CON(63)_vs_CON(61),CBN(4)_vs_CON(96),CBN(18)_vs_DMN(32)
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
10001,0.007778,0.005123,-0.174268,-0.237781,0.061363,0.106191,-0.032733,-0.196441,-0.218899,-0.206151,-0.194491,-0.189249,-0.205466,0.506615,0.415278,-0.161351,-0.650465,0.103365,0.135609,0.162034
10002,0.005419,0.008819,-0.118116,-0.123721,0.011817,0.281979,-0.039436,0.025548,0.052610,0.017059,-0.256147,-0.017400,-0.186921,0.550249,0.644697,-0.532091,-0.641973,0.230863,0.022653,-0.111987
10004,-0.005184,0.006837,-0.026237,-0.123536,-0.249878,0.167544,-0.028234,-0.084339,-0.068464,0.002111,-0.203712,-0.240904,-0.107236,0.515128,0.561369,-0.304497,-0.681688,-0.098098,0.090604,-0.093209
10007,-0.003470,0.013983,-0.190934,-0.196622,0.164454,0.243260,0.201671,-0.196746,-0.201379,-0.363255,-0.431900,-0.027559,0.047974,0.571199,0.513946,0.070705,-0.563614,0.384572,0.038178,0.068315
10008,-0.004683,0.010363,-0.188684,-0.218577,-0.253535,0.502345,0.095305,-0.073015,0.021830,-0.015584,-0.106409,-0.216865,0.190114,0.244827,0.259507,0.186585,-0.572532,0.580120,-0.455242,0.257266
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21746,-0.008532,0.012812,-0.204301,-0.085494,-0.036488,0.359534,0.074277,0.099193,0.059134,0.129983,-0.257288,-0.028123,0.040496,0.440924,0.426671,-0.160196,-0.433337,0.193070,-0.008901,-0.045315
21747,0.002536,0.006906,-0.225046,-0.200598,-0.286451,0.403434,0.003459,0.073125,0.047377,0.094822,0.293412,0.599027,-0.197806,0.502043,0.299722,0.109805,-0.520445,0.159934,0.002706,0.076437
21750,-0.005588,0.012912,-0.273216,-0.360215,-0.132522,0.198278,-0.126540,-0.359769,-0.534185,-0.470276,-0.267290,-0.517697,-0.104845,0.409956,0.282012,-0.012652,-0.668469,0.185859,0.064626,0.034142
21752,0.000084,0.010305,-0.122015,-0.269417,-0.305124,0.016823,-0.076639,-0.000471,-0.144087,-0.268829,-0.162456,-0.269294,-0.009966,0.663694,0.562030,-0.101714,-0.339811,0.627081,0.089327,0.146599


In [22]:
X_d2v1, cols = select_features(X, Y_d2v1, num_features)
print('X_d2v1: ', X_age.shape)
df = X.iloc[:, cols]
df

X_d2v1:  (5434, 20)


Unnamed: 0_level_0,CON(33)_vs_SCN(69),SMN(27)_vs_SCN(45),DMN(40)_vs_ADN(21),SMN(9)_vs_ADN(56),DMN(40)_vs_SMN(3),DMN(51)_vs_SMN(3),CON(96)_vs_SMN(9),CBN(7)_vs_SMN(9),DMN(40)_vs_SMN(54),DMN(40)_vs_SMN(66),DMN(17)_vs_SMN(80),DMN(71)_vs_VSN(77),CON(67)_vs_CON(79),DMN(40)_vs_CON(79),CON(38)_vs_CON(48),DMN(71)_vs_DMN(40),DMN(94)_vs_DMN(40),DMN(51)_vs_DMN(71),CBN(4)_vs_CBN(18),CBN(7)_vs_CBN(18)
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
10001,0.047839,-0.026252,-0.312873,0.412030,0.056139,-0.199641,-0.301165,-0.322968,-0.399011,-0.443414,-0.472251,-0.329438,0.674686,-0.646864,-0.491870,0.553354,0.633076,0.276572,-0.060712,0.515964
10002,-0.097829,0.025005,0.049829,0.272512,-0.055538,0.149459,-0.289190,-0.187717,-0.473139,-0.597774,-0.555564,-0.288355,0.605658,-0.748737,-0.161304,0.618487,0.794931,0.185737,0.028649,0.705524
10004,0.039998,0.046453,-0.363683,0.543602,-0.206951,-0.222281,-0.347200,0.135489,-0.380253,-0.335209,-0.547358,-0.183095,0.142926,-0.491857,-0.325853,0.361801,0.814326,0.329944,0.012435,0.665937
10007,0.059122,-0.382881,-0.193953,0.529440,-0.151919,-0.275767,-0.322545,-0.292497,-0.258078,-0.125413,-0.539007,-0.293164,0.338007,-0.430483,-0.298197,0.635747,0.747550,0.210867,0.161005,0.703679
10008,-0.067433,0.285562,-0.425658,0.157806,-0.445201,-0.213180,-0.236043,-0.101203,-0.373641,-0.635514,-0.253377,-0.067648,0.611881,-0.344151,-0.297851,0.361475,0.776100,0.311159,0.112785,0.574596
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21746,0.182572,-0.038013,-0.287007,0.121585,-0.091542,-0.157984,-0.068518,0.018371,-0.183829,-0.170958,-0.311718,-0.051364,0.431944,-0.444649,-0.227013,0.600505,0.500673,0.302956,0.055471,0.568766
21747,-0.002574,-0.691454,-0.149966,0.166684,-0.236543,0.208085,-0.111724,-0.132470,-0.370368,-0.312447,-0.441865,-0.250452,0.555108,-0.328885,-0.104549,0.370689,0.513124,-0.284213,0.303312,0.704483
21750,0.019325,0.076572,-0.237146,0.536680,-0.308398,0.122291,-0.430573,-0.112076,-0.458021,-0.291561,-0.572659,-0.248346,0.390378,-0.244226,-0.344092,0.486941,0.505760,0.095415,0.343754,0.705207
21752,-0.186613,-0.128059,0.134706,0.524046,0.305244,-0.053184,-0.475248,-0.279612,-0.061084,-0.198596,-0.454013,0.007512,0.383444,-0.342059,-0.160817,0.424333,0.570798,0.354280,0.228977,0.560968


In [23]:
X_d2v2, cols = select_features(X, Y_d2v2, num_features)
print('X_d2v2: ', X_age.shape)
df = X.iloc[:, cols]
df

X_d2v2:  (5434, 20)


Unnamed: 0_level_0,CON(33)_vs_SCN(69),SMN(27)_vs_SCN(45),DMN(40)_vs_ADN(21),SMN(9)_vs_ADN(56),DMN(40)_vs_SMN(3),DMN(51)_vs_SMN(3),CON(96)_vs_SMN(9),CBN(7)_vs_SMN(9),DMN(40)_vs_SMN(54),DMN(40)_vs_SMN(66),DMN(17)_vs_SMN(80),DMN(71)_vs_VSN(77),CON(67)_vs_CON(79),DMN(40)_vs_CON(79),CON(38)_vs_CON(48),DMN(71)_vs_DMN(40),DMN(94)_vs_DMN(40),DMN(51)_vs_DMN(71),CBN(4)_vs_CBN(18),CBN(7)_vs_CBN(18)
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
10001,0.047839,-0.026252,-0.312873,0.412030,0.056139,-0.199641,-0.301165,-0.322968,-0.399011,-0.443414,-0.472251,-0.329438,0.674686,-0.646864,-0.491870,0.553354,0.633076,0.276572,-0.060712,0.515964
10002,-0.097829,0.025005,0.049829,0.272512,-0.055538,0.149459,-0.289190,-0.187717,-0.473139,-0.597774,-0.555564,-0.288355,0.605658,-0.748737,-0.161304,0.618487,0.794931,0.185737,0.028649,0.705524
10004,0.039998,0.046453,-0.363683,0.543602,-0.206951,-0.222281,-0.347200,0.135489,-0.380253,-0.335209,-0.547358,-0.183095,0.142926,-0.491857,-0.325853,0.361801,0.814326,0.329944,0.012435,0.665937
10007,0.059122,-0.382881,-0.193953,0.529440,-0.151919,-0.275767,-0.322545,-0.292497,-0.258078,-0.125413,-0.539007,-0.293164,0.338007,-0.430483,-0.298197,0.635747,0.747550,0.210867,0.161005,0.703679
10008,-0.067433,0.285562,-0.425658,0.157806,-0.445201,-0.213180,-0.236043,-0.101203,-0.373641,-0.635514,-0.253377,-0.067648,0.611881,-0.344151,-0.297851,0.361475,0.776100,0.311159,0.112785,0.574596
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21746,0.182572,-0.038013,-0.287007,0.121585,-0.091542,-0.157984,-0.068518,0.018371,-0.183829,-0.170958,-0.311718,-0.051364,0.431944,-0.444649,-0.227013,0.600505,0.500673,0.302956,0.055471,0.568766
21747,-0.002574,-0.691454,-0.149966,0.166684,-0.236543,0.208085,-0.111724,-0.132470,-0.370368,-0.312447,-0.441865,-0.250452,0.555108,-0.328885,-0.104549,0.370689,0.513124,-0.284213,0.303312,0.704483
21750,0.019325,0.076572,-0.237146,0.536680,-0.308398,0.122291,-0.430573,-0.112076,-0.458021,-0.291561,-0.572659,-0.248346,0.390378,-0.244226,-0.344092,0.486941,0.505760,0.095415,0.343754,0.705207
21752,-0.186613,-0.128059,0.134706,0.524046,0.305244,-0.053184,-0.475248,-0.279612,-0.061084,-0.198596,-0.454013,0.007512,0.383444,-0.342059,-0.160817,0.424333,0.570798,0.354280,0.228977,0.560968


In [24]:
def load_subject(filepath):
    ## r = read only
    subject_data = None
    with h5py.File(filepath, 'r') as f:
        subject_data = f.get('SM_feature')
        subject_data = np.array(subject_data)
        ## 3 cuts, 53 images of frontal, axial and lateral cuts
        subject_data = np.moveaxis(subject_data, [0,1,2,3], [3,2,1,0])
        return subject_data

In [25]:
## Time to read in 20 training images gulp
num_samples = 20
folder_path = f'{BASE_PATH}/fMRI_train'
subject_data = []
for i, file_name in enumerate(file_names):
    if (i > num_samples):
        break
    subject_data.append(load_subject(f'{folder_path}/{file_name}.mat'))
len(subject_data)
print('individual subject: ', subject_data[0].shape)

individual subject:  (53, 63, 52, 53)


In [26]:
print(f'subject has {subject_data[0].shape[-1]} number of components')

subject has 53 number of components


In [27]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader    

In [31]:
class Trends_dataset(Dataset):
    def __init__(self, mode, folds):
        ## use this to retrieve and convert to tensor (torch.from_numpy)
        self.X = []
        self.Y = []
        
    def __getitem__(self, index):
        # for retrieving one item using index
        
    def __len__(self):
        #len(dataset)

IndentationError: expected an indented block (<ipython-input-31-ba98379e87de>, line 10)