# About

This code is for creating a file that contains paths of single 3D image from per subject in order to utilize FreeSurfer for skull-stripping.
We aim to decrease manual time spend to retrieve patients' stripped brain images.

-> The main idea of ours, using 1 MRI result from every subject. This requires file processing because every subject has more than one sample that we do not desperately need. For this task, our purpose is not finding progress existence for particular subject.<br/>
-> In fact, in some cases, having additional results can also be considered a natural increase in data. However, we need to reduce labor as much as possible to accomplish the task. (If you have enough labor to increase data size -using other visits of the subjects-, it may increase the overall test results.) If we develop fully-autonomous approach for [SynthStrip](https://surfer.nmr.mgh.harvard.edu/docs/synthstrip/), update will come.

Moreover, train-test-val will be splitted in proper way. This is one of the crucial parts of the entire task.

In [47]:
import os
from pathlib import Path 
from nibabel.testing import data_path
import nibabel as nib

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

from sklearn.model_selection import train_test_split

In [48]:
class utils:

    def __init__(self):
        return
    
    ''' Find image in the given path '''
    def take_mri(self, subject_id, path):
        for root, dirs, files in os.walk(path):
            if subject_id in root:
                for file in files:
                    if file.endswith(".nii"):
                        return os.path.join(root, file)
           

    def create_train_test(self, x, y, path):
        x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, test_size=0.3, random_state=42)
        x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, stratify=y_test, test_size=0.5, random_state=42)

        
        df_train = pd.DataFrame(); df_test = pd.DataFrame(); df_val = pd.DataFrame()

        df_train['subject'] = x_train; df_train['group'] = y_train
        df_test['subject'] = x_test; df_test['group'] = y_test
        df_val['subject'] = x_val; df_val['group'] = y_val

    
        train_path = Path(path+'train/train.csv')
        test_path = Path(path+'test/test.csv')
        val_path = Path(path+'val/val.csv')

        train_path.parent.mkdir(parents= True, exist_ok=True)
        test_path.parent.mkdir(parents= True, exist_ok=True)
        val_path.parent.mkdir(parents= True, exist_ok=True)

        if not os.path.exists(train_path):
            df_train.to_csv(train_path, index=False)
        else:
            print("Train .csv file is already exist.")
        if not os.path.exists(test_path):
            df_test.to_csv(test_path, index=False)
        else:
            print("Test .csv file is already exist.")
        if not os.path.exists(val_path):
            df_val.to_csv(val_path, index=False)
        else:
            print("Validation .csv file is already exist.")


    def create_in_out_paths(self, data, path, mode):
        assert mode in ['train', 'test', 'val']
        input_paths = []
        output_paths = []

        for i in range(len(data['subject'])):
            subject = data['subject'][i]
            input_paths.append(self.take_mri(subject, path))
            mri_date = input_paths[i].rsplit('/', 5)[3][:-11] # takes retrive date from file path
            output_paths.append(os.path.join(path, mode, f'{subject}-{mri_date}.nii'))
        
        return input_paths, output_paths


    def create_cmd_file(self, data):
        commands = []
        for i in range(len(data['subject'])):
            string = 'mri_synthstrip -i '+ data['input_path'][i] + ' -o ' + data['output_path'][i] + ' --no-csf'
            commands.append(string)
        return commands

    def create_cmd_file_alternative(self, data):
        commands = []
        for i in range(len(data['subject'])):
            string = 'mri_synthstrip -i '+ data['input_path'][i] + ' -o ' + data['output_path'][i] + ' --no-csf'
            commands.append(f'Subject: {i} -> ' + string + '\n')
        return commands
    

In [49]:
u = utils()

In [50]:
path = '/Users/toygar/Desktop/Bitirme/data/'
file = 'ADNI1_Complete_3Yr_1.5T_1_22_2023.csv'

In [51]:
data = pd.read_csv(os.path.join(path, file))

In [52]:
df = data.groupby(['Subject'])['Group'].describe()
df['top'].value_counts()

MCI    148
CN     135
AD      99
Name: top, dtype: int64

In [53]:
df.rename(columns={'top': 'group', 'count':'img_count'}, inplace=True)
df.drop(['unique', 'freq'], axis=1, inplace=True)

In [54]:
x = np.array(df.index)   # unique subject id
y = np.array(df['group']) # label

In [55]:
df

Unnamed: 0_level_0,img_count,group
Subject,Unnamed: 1_level_1,Unnamed: 2_level_1
002_S_0295,7,CN
002_S_0413,7,CN
002_S_0619,5,AD
002_S_0685,6,CN
002_S_0729,7,MCI
...,...,...
137_S_0994,6,MCI
137_S_1041,4,AD
137_S_1414,6,MCI
941_S_1194,5,CN


In [56]:
# all unique subjects can be found in this array to utilize
df.index

Index(['002_S_0295', '002_S_0413', '002_S_0619', '002_S_0685', '002_S_0729',
       '002_S_0782', '002_S_0938', '002_S_1018', '002_S_1070', '002_S_1155',
       ...
       '137_S_0722', '137_S_0796', '137_S_0800', '137_S_0972', '137_S_0973',
       '137_S_0994', '137_S_1041', '137_S_1414', '941_S_1194', '941_S_1202'],
      dtype='object', name='Subject', length=382)

In [57]:
u.create_train_test(x, y, path)

## Check train and test csv files if correctly seperated

In [58]:
train = pd.read_csv(os.path.join(path, 'train/train.csv'))
test = pd.read_csv(os.path.join(path, 'test/test.csv'))
val = pd.read_csv(os.path.join(path, 'val/val.csv'))

In [59]:
train

Unnamed: 0,subject,group
0,027_S_0404,AD
1,022_S_0130,CN
2,033_S_1098,CN
3,007_S_0249,MCI
4,098_S_0172,CN
...,...,...
262,123_S_1300,MCI
263,037_S_0303,CN
264,005_S_0221,AD
265,053_S_0507,MCI


In [60]:
test

Unnamed: 0,subject,group
0,127_S_0112,MCI
1,137_S_0631,MCI
2,099_S_0291,MCI
3,022_S_0961,MCI
4,068_S_0210,CN
5,052_S_1251,CN
6,024_S_1307,AD
7,021_S_0626,MCI
8,126_S_1221,AD
9,021_S_0273,MCI


In [61]:
val

Unnamed: 0,subject,group
0,029_S_0914,MCI
1,051_S_1123,CN
2,127_S_0754,AD
3,127_S_0394,MCI
4,027_S_0074,CN
5,057_S_1371,AD
6,941_S_1194,CN
7,007_S_0041,MCI
8,099_S_0534,CN
9,099_S_0352,CN


In [62]:
train['group'].value_counts()

MCI    104
CN      94
AD      69
Name: group, dtype: int64

In [63]:
test['group'].value_counts()

MCI    22
CN     20
AD     15
Name: group, dtype: int64

In [64]:
val['group'].value_counts()

MCI    22
CN     21
AD     15
Name: group, dtype: int64

## Prepare data for FreeSurfer stripper commands
    for us stripper file format will be:  mri_synthstrip -i input -o stripped --no-csf


In [65]:
train['input_path'], train['output_path'] = u.create_in_out_paths(train, path, 'train')
test['input_path'], test['output_path'] = u.create_in_out_paths(test, path, 'test')
val['input_path'], val['output_path'] = u.create_in_out_paths(val, path, 'val')

In [66]:
test['input_path'].values[:3]

array(['/Users/toygar/Desktop/Bitirme/data/ADNI/127_S_0112/MPR__GradWarp__B1_Correction__N3__Scaled/2006-01-13_15_17_26.0/I35799/ADNI_127_S_0112_MR_MPR__GradWarp__B1_Correction__N3__Scaled_Br_20070110222426253_S11194_I35799.nii',
       '/Users/toygar/Desktop/Bitirme/data/ADNI/137_S_0631/MPR__GradWarp__N3__Scaled/2006-06-16_10_46_15.0/I46655/ADNI_137_S_0631_MR_MPR__GradWarp__N3__Scaled_Br_20070323174408307_S15598_I46655.nii',
       '/Users/toygar/Desktop/Bitirme/data/ADNI/099_S_0291/MPR__GradWarp__N3__Scaled/2006-09-29_10_44_48.0/I34531/ADNI_099_S_0291_MR_MPR__GradWarp__N3__Scaled_Br_20061228142752137_S19512_I34531.nii'],
      dtype=object)

In [67]:
test['output_path'].values[:3]

array(['/Users/toygar/Desktop/Bitirme/data/test/127_S_0112-2006-01-13.nii',
       '/Users/toygar/Desktop/Bitirme/data/test/137_S_0631-2006-06-16.nii',
       '/Users/toygar/Desktop/Bitirme/data/test/099_S_0291-2006-09-29.nii'],
      dtype=object)

In [68]:
train

Unnamed: 0,subject,group,input_path,output_path
0,027_S_0404,AD,/Users/toygar/Desktop/Bitirme/data/ADNI/027_S_...,/Users/toygar/Desktop/Bitirme/data/train/027_S...
1,022_S_0130,CN,/Users/toygar/Desktop/Bitirme/data/ADNI/022_S_...,/Users/toygar/Desktop/Bitirme/data/train/022_S...
2,033_S_1098,CN,/Users/toygar/Desktop/Bitirme/data/ADNI/033_S_...,/Users/toygar/Desktop/Bitirme/data/train/033_S...
3,007_S_0249,MCI,/Users/toygar/Desktop/Bitirme/data/ADNI/007_S_...,/Users/toygar/Desktop/Bitirme/data/train/007_S...
4,098_S_0172,CN,/Users/toygar/Desktop/Bitirme/data/ADNI/098_S_...,/Users/toygar/Desktop/Bitirme/data/train/098_S...
...,...,...,...,...
262,123_S_1300,MCI,/Users/toygar/Desktop/Bitirme/data/ADNI/123_S_...,/Users/toygar/Desktop/Bitirme/data/train/123_S...
263,037_S_0303,CN,/Users/toygar/Desktop/Bitirme/data/ADNI/037_S_...,/Users/toygar/Desktop/Bitirme/data/train/037_S...
264,005_S_0221,AD,/Users/toygar/Desktop/Bitirme/data/ADNI/005_S_...,/Users/toygar/Desktop/Bitirme/data/train/005_S...
265,053_S_0507,MCI,/Users/toygar/Desktop/Bitirme/data/ADNI/053_S_...,/Users/toygar/Desktop/Bitirme/data/train/053_S...


In [69]:
train.to_csv(os.path.join(path, 'train/train.csv'), index=False)
test.to_csv(os.path.join(path, 'test/test.csv'), index=False)
val.to_csv(os.path.join(path, 'val/val.csv'), index=False)

*In a quite ugly way, we can simply create new dataframe as follows:*

In [70]:
frame = [train, test, val]
all = pd.concat(frame).reset_index(drop=True)

In [71]:
all

Unnamed: 0,subject,group,input_path,output_path
0,027_S_0404,AD,/Users/toygar/Desktop/Bitirme/data/ADNI/027_S_...,/Users/toygar/Desktop/Bitirme/data/train/027_S...
1,022_S_0130,CN,/Users/toygar/Desktop/Bitirme/data/ADNI/022_S_...,/Users/toygar/Desktop/Bitirme/data/train/022_S...
2,033_S_1098,CN,/Users/toygar/Desktop/Bitirme/data/ADNI/033_S_...,/Users/toygar/Desktop/Bitirme/data/train/033_S...
3,007_S_0249,MCI,/Users/toygar/Desktop/Bitirme/data/ADNI/007_S_...,/Users/toygar/Desktop/Bitirme/data/train/007_S...
4,098_S_0172,CN,/Users/toygar/Desktop/Bitirme/data/ADNI/098_S_...,/Users/toygar/Desktop/Bitirme/data/train/098_S...
...,...,...,...,...
377,126_S_0865,MCI,/Users/toygar/Desktop/Bitirme/data/ADNI/126_S_...,/Users/toygar/Desktop/Bitirme/data/val/126_S_0...
378,021_S_0141,MCI,/Users/toygar/Desktop/Bitirme/data/ADNI/021_S_...,/Users/toygar/Desktop/Bitirme/data/val/021_S_0...
379,094_S_1188,MCI,/Users/toygar/Desktop/Bitirme/data/ADNI/094_S_...,/Users/toygar/Desktop/Bitirme/data/val/094_S_1...
380,024_S_1063,CN,/Users/toygar/Desktop/Bitirme/data/ADNI/024_S_...,/Users/toygar/Desktop/Bitirme/data/val/024_S_1...


In [72]:
commands = pd.DataFrame()
commands['cmd'] = u.create_cmd_file(all)
commands['cmd'].to_csv(os.path.join(path, 'commands.txt'), header=None, index=None)

In [73]:
# more user friendly that allows you to track which subject you're at
alternative_commands = pd.DataFrame()
alternative_commands['cmd'] = u.create_cmd_file_alternative(all)
alternative_commands['cmd'].to_csv(os.path.join(path, 'alternative_commands.txt'), header=None, index=None)