For schiz dataset, subject names in Rskeleton_subjects contains session names. Moreover, a prefix 'sub-' is added.
This notebook adds session names to train, val, test, test-intra and used_schiz subjects

In [200]:
import pandas as pd

# Gets data

In [201]:
dataset_path = "/neurospin/dico/data/deep_folding/current/datasets/schiz"
src_path = f"{dataset_path}/aymeric_stratification"
output_path = f"{dataset_path}/aymeric_stratification/with_sub"

In [202]:
target = pd.read_csv(f"{dataset_path}/crops/2mm/S.C.-S.Pe.C./mask/Rskeleton_subject.csv")
all = pd.read_csv(f"{src_path}/used_schiz_subjects.csv")
train = pd.read_csv(f"{src_path}/train_subjects.csv", header=None)
val = pd.read_csv(f"{src_path}/val_subjects.csv", header=None)
test_intra = pd.read_csv(f"{src_path}/test_intra_subjects.csv", header=None)
test = pd.read_csv(f"{src_path}/test_subjects.csv", header=None)

In [203]:
target

Unnamed: 0,Subject
0,sub-INV027JRF0P
1,sub-INV056VJPL6
2,sub-INV05AFGN2Z
3,sub-INV0647UAN8
4,sub-INV07WT2ZL3
...,...
2176,sub-st110288_ses-v1
2177,sub-va110289_ses-v1
2178,sub-ye110322_ses-v1
2179,sub-yh100442_ses-v1


In [204]:
all

Unnamed: 0,participant_id,sex,age,diagnosis,study,site
0,INV07WT2ZL3,M,29.0,control,BSNIP,Dallas
1,INV0AL14J6U,M,43.0,schizophrenia,BSNIP,Dallas
2,INV14XK7P6E,M,30.0,control,BSNIP,Dallas
3,INV1HXNTXYF,F,18.0,control,BSNIP,Dallas
4,INV1XCNF4J5,F,25.0,control,BSNIP,Dallas
...,...,...,...,...,...,...
1287,ESOC10060,F,26.0,control,PRAGUE,PRAGUE
1288,ESOC10019,F,28.0,control,PRAGUE,PRAGUE
1289,ESOC10063,M,31.0,control,PRAGUE,PRAGUE
1290,ESOC10098,F,27.0,control,PRAGUE,PRAGUE


In [205]:
train

Unnamed: 0,0
0,ESOC10104
1,NM2020
2,or130001
3,ESOC10112
4,CC6287
...,...
913,HC027
914,HC010
915,SS086
916,HC023


In [206]:
val

Unnamed: 0,0
0,CC0600
1,NM4329
2,CC2038
3,ESOC10077
4,NM1005
...,...
98,10290
99,11019
100,50007
101,HC026


In [207]:
test_intra

Unnamed: 0,0
0,CC7563
1,NM1014
2,NM1082
3,ESOC10099
4,ESOC10027
...,...
110,10696
111,SS093
112,SS100
113,HC021


In [208]:
test

Unnamed: 0,0
0,A00014522
1,A00001243
2,A00028405
3,A00028408
4,A00020968
...,...
159,A00027537
160,A00026945
161,A00014636
162,A00022915


# Transforms and merges

In [209]:
target[['first', 'second']] = target['Subject'].str.split('_', n=1, expand=True)
target

Unnamed: 0,Subject,first,second
0,sub-INV027JRF0P,sub-INV027JRF0P,
1,sub-INV056VJPL6,sub-INV056VJPL6,
2,sub-INV05AFGN2Z,sub-INV05AFGN2Z,
3,sub-INV0647UAN8,sub-INV0647UAN8,
4,sub-INV07WT2ZL3,sub-INV07WT2ZL3,
...,...,...,...
2176,sub-st110288_ses-v1,sub-st110288,ses-v1
2177,sub-va110289_ses-v1,sub-va110289,ses-v1
2178,sub-ye110322_ses-v1,sub-ye110322,ses-v1
2179,sub-yh100442_ses-v1,sub-yh100442,ses-v1


In [210]:
def transform_merge(src, target, src_subject_col):
    src[src_subject_col] = "sub-" + src[src_subject_col]
    src = src.merge(target, left_on=src_subject_col, right_on='first')
    src[src_subject_col] = src['Subject']
    src = src.drop(columns=['Subject', 'first', 'second'])
    return src

In [211]:
all = transform_merge(all, target, 'participant_id')
train = transform_merge(train, target, 0)
val = transform_merge(val, target, 0)
test_intra = transform_merge(test_intra, target, 0)
test = transform_merge(test, target, 0)

In [212]:
all

Unnamed: 0,participant_id,sex,age,diagnosis,study,site
0,sub-INV07WT2ZL3,M,29.0,control,BSNIP,Dallas
1,sub-INV0AL14J6U,M,43.0,schizophrenia,BSNIP,Dallas
2,sub-INV14XK7P6E,M,30.0,control,BSNIP,Dallas
3,sub-INV1HXNTXYF,F,18.0,control,BSNIP,Dallas
4,sub-INV1XCNF4J5,F,25.0,control,BSNIP,Dallas
...,...,...,...,...,...,...
1287,sub-ESOC10060_ses-v1,F,26.0,control,PRAGUE,PRAGUE
1288,sub-ESOC10019_ses-v1,F,28.0,control,PRAGUE,PRAGUE
1289,sub-ESOC10063_ses-v1,M,31.0,control,PRAGUE,PRAGUE
1290,sub-ESOC10098_ses-v1,F,27.0,control,PRAGUE,PRAGUE


In [213]:
train

Unnamed: 0,0
0,sub-ESOC10104_ses-v1
1,sub-NM2020_ses-v1
2,sub-or130001_ses-v1
3,sub-ESOC10112_ses-v1
4,sub-CC6287_ses-v1
...,...
908,sub-HC027_ses-1
909,sub-HC010_ses-1
910,sub-SS086_ses-1
911,sub-HC023_ses-1


In [214]:
val

Unnamed: 0,0
0,sub-CC0600_ses-v1
1,sub-CC2038_ses-v1
2,sub-ESOC10077_ses-v1
3,sub-NM1005_ses-v1
4,sub-en110309_ses-v1
...,...
97,sub-10290_ses-1
98,sub-11019_ses-1
99,sub-50007_ses-1
100,sub-HC026_ses-1


In [215]:
test_intra

Unnamed: 0,0
0,sub-CC7563_ses-v1
1,sub-NM1014_ses-v1
2,sub-NM1082_ses-v1
3,sub-ESOC10099_ses-v1
4,sub-ESOC10027_ses-v1
...,...
109,sub-10696_ses-1
110,sub-SS093_ses-1
111,sub-SS100_ses-1
112,sub-HC021_ses-1


In [216]:
test

Unnamed: 0,0
0,sub-A00014522_ses-v1
1,sub-A00001243_ses-v1
2,sub-A00028405_ses-v1
3,sub-A00028408_ses-v1
4,sub-A00020968_ses-v1
...,...
158,sub-A00027537_ses-v1
159,sub-A00026945_ses-v1
160,sub-A00014636_ses-v1
161,sub-A00022915_ses-v1


# Save data

In [217]:
all.to_csv(f"{output_path}/used_schiz_subjects.csv", index=False)
train.to_csv(f"{output_path}/train_subjects.csv", index=False, header=False)
val.to_csv(f"{output_path}/val_subjects.csv", index=False, header=False)
test_intra.to_csv(f"{output_path}/test_intra_subjects.csv", index=False, header=False)
test.to_csv(f"{output_path}/test_subjects.csv", index=False, header=False)