# About

This code is for creating a file that contains paths of single 3D image from per subject in order to utilize FreeSurfer for skull-stripping.
We aim to decrease manual time spend to retrieve patients' stripped brain images.

-> The main idea of ours, using 1 MRI result from every subject. This requires file processing because every subject has more than one sample that we do not desperately need. For this task, our purpose is not finding progress existence for particular subject.<br/>
-> In fact, in some cases, having additional results can also be considered a natural increase in data. However, we need to reduce pointless labor as much as possible to accomplish the task.

Moreover, train-test will be splitted in proper way. This is one of the crucial parts of the entire task.

In [108]:
import os
from pathlib import Path 
from nibabel.testing import data_path
import nibabel as nib

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

from sklearn.model_selection import train_test_split

In [145]:
class utils:

    def __init__(self):
        return

    def read_image(self, img_path):
        return nib.load(os.path.join(data_path, img_path)).get_fdata()
    
    ''' Find image in the given path '''
    def take_mri(self, patient_id, path):
        for root, dirs, files in os.walk(path):
            if patient_id in root:
                for file in files:
                    if file.endswith(".nii"):
                        return os.path.join(root, file)
           

    def create_train_test(self, x, y, path):
        x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, test_size=0.3, random_state=42)
        
        df_train = pd.DataFrame(); df_test = pd.DataFrame()

        df_train['subject'] = x_train; df_train['type'] = y_train
        df_test['subject'] = x_test; df_test['type'] = y_test
    
        train_path = Path(path+'train/train.csv')
        test_path = Path(path+'test/test.csv')  

        train_path.parent.mkdir(parents= True, exist_ok=True)
        test_path.parent.mkdir(parents= True, exist_ok=True)

        if not os.path.exists(train_path):
            df_train.to_csv(train_path, index=False)
        else:
            print("Train .csv file is already exist.")
        if not os.path.exists(test_path):
            df_test.to_csv(test_path, index=False)
        else:
            print("Test .csv file is already exist.")


    def create_in_out_paths(self, data, path, mode):
        input_paths = []
        output_paths = []

        for i in range(len(data['subject'])):
            subject = data['subject'][i]
            input_paths.append(self.take_mri(subject, path))
            mri_date = input_paths[i].rsplit('/', 5)[3][:-11] # takes retrive date from file path
            output_paths.append(os.path.join(path, mode, f'{subject}-{mri_date}.nii'))
        
        return input_paths, output_paths

    

In [146]:
u = utils()

In [111]:
path = '/Users/toygar/Desktop/Bitirme/data/'
file = 'ADNI1_Complete_3Yr_1.5T_1_22_2023.csv'

In [112]:
data = pd.read_csv(os.path.join(path, file))

In [113]:
df = data.groupby(['Subject'])['Group'].describe()
df['top'].value_counts()

MCI    148
CN     135
AD      99
Name: top, dtype: int64

In [114]:
df.rename(columns={'top': 'type', 'count':'img_count'}, inplace=True)
df.drop(['unique', 'freq'], axis=1, inplace=True)

In [115]:
x = np.array(df.index)   # unique subject id
y = np.array(df['type']) # label

In [116]:
df

Unnamed: 0_level_0,img_count,type
Subject,Unnamed: 1_level_1,Unnamed: 2_level_1
002_S_0295,7,CN
002_S_0413,7,CN
002_S_0619,5,AD
002_S_0685,6,CN
002_S_0729,7,MCI
...,...,...
137_S_0994,6,MCI
137_S_1041,4,AD
137_S_1414,6,MCI
941_S_1194,5,CN


In [117]:
# all unique subjects can be found in this array to utilize
df.index

Index(['002_S_0295', '002_S_0413', '002_S_0619', '002_S_0685', '002_S_0729',
       '002_S_0782', '002_S_0938', '002_S_1018', '002_S_1070', '002_S_1155',
       ...
       '137_S_0722', '137_S_0796', '137_S_0800', '137_S_0972', '137_S_0973',
       '137_S_0994', '137_S_1041', '137_S_1414', '941_S_1194', '941_S_1202'],
      dtype='object', name='Subject', length=382)

In [136]:
u.create_train_test(x, y, path)

Train .csv file is already exist.
Test .csv file is already exist.


## Check train and test csv files if correctly seperated

In [139]:
train = pd.read_csv(os.path.join(path, 'train/train.csv'))
test = pd.read_csv(os.path.join(path, 'test/test.csv'))

In [140]:
train

Unnamed: 0,subject,type
0,027_S_0404,AD
1,022_S_0130,CN
2,033_S_1098,CN
3,007_S_0249,MCI
4,098_S_0172,CN
...,...,...
262,123_S_1300,MCI
263,037_S_0303,CN
264,005_S_0221,AD
265,053_S_0507,MCI


In [141]:
test

Unnamed: 0,subject,type
0,052_S_1251,CN
1,099_S_0534,CN
2,011_S_0326,MCI
3,114_S_0374,AD
4,136_S_0429,MCI
...,...,...
110,016_S_0359,CN
111,023_S_0081,CN
112,022_S_0096,CN
113,018_S_0286,AD


In [142]:
train['type'].value_counts()

MCI    104
CN      94
AD      69
Name: type, dtype: int64

In [143]:
test['type'].value_counts()

MCI    44
CN     41
AD     30
Name: type, dtype: int64

## Prepare data for FreeSurfer stripper command
    for us stripper file format will be:  mri_synthstrip -i input -o stripped --no-csf


In [147]:
train['input_path'], train['output_path'] = u.create_in_out_paths(train, path, 'train')

In [149]:
test['input_path'], test['output_path'] = u.create_in_out_paths(test, path, 'test')

In [150]:
test['input_path'].values[:3]

array(['/Users/toygar/Desktop/Bitirme/data/ADNI/052_S_1251/MPR__GradWarp__B1_Correction__N3__Scaled/2007-08-31_13_15_40.0/I78985/ADNI_052_S_1251_MR_MPR__GradWarp__B1_Correction__N3__Scaled_Br_20071028104401944_S38754_I78985.nii',
       '/Users/toygar/Desktop/Bitirme/data/ADNI/099_S_0534/MPR-R__GradWarp__B1_Correction__N3__Scaled/2007-05-23_13_08_56.0/I65464/ADNI_099_S_0534_MR_MPR-R__GradWarp__B1_Correction__N3__Scaled_Br_20070807112330344_S32563_I65464.nii',
       '/Users/toygar/Desktop/Bitirme/data/ADNI/011_S_0326/MPR-R__GradWarp__B1_Correction__N3__Scaled/2009-04-06_11_29_26.0/I143475/ADNI_011_S_0326_MR_MPR-R__GradWarp__B1_Correction__N3__Scaled_Br_20090511113001987_S65640_I143475.nii'],
      dtype=object)

In [151]:
test['output_path'].values[:3]

array(['/Users/toygar/Desktop/Bitirme/data/test/052_S_1251-2007-08-31.nii',
       '/Users/toygar/Desktop/Bitirme/data/test/099_S_0534-2007-05-23.nii',
       '/Users/toygar/Desktop/Bitirme/data/test/011_S_0326-2009-04-06.nii'],
      dtype=object)

In [152]:
train

Unnamed: 0,subject,type,input_path,output_path
0,027_S_0404,AD,/Users/toygar/Desktop/Bitirme/data/ADNI/027_S_...,/Users/toygar/Desktop/Bitirme/data/train/027_S...
1,022_S_0130,CN,/Users/toygar/Desktop/Bitirme/data/ADNI/022_S_...,/Users/toygar/Desktop/Bitirme/data/train/022_S...
2,033_S_1098,CN,/Users/toygar/Desktop/Bitirme/data/ADNI/033_S_...,/Users/toygar/Desktop/Bitirme/data/train/033_S...
3,007_S_0249,MCI,/Users/toygar/Desktop/Bitirme/data/ADNI/007_S_...,/Users/toygar/Desktop/Bitirme/data/train/007_S...
4,098_S_0172,CN,/Users/toygar/Desktop/Bitirme/data/ADNI/098_S_...,/Users/toygar/Desktop/Bitirme/data/train/098_S...
...,...,...,...,...
262,123_S_1300,MCI,/Users/toygar/Desktop/Bitirme/data/ADNI/123_S_...,/Users/toygar/Desktop/Bitirme/data/train/123_S...
263,037_S_0303,CN,/Users/toygar/Desktop/Bitirme/data/ADNI/037_S_...,/Users/toygar/Desktop/Bitirme/data/train/037_S...
264,005_S_0221,AD,/Users/toygar/Desktop/Bitirme/data/ADNI/005_S_...,/Users/toygar/Desktop/Bitirme/data/train/005_S...
265,053_S_0507,MCI,/Users/toygar/Desktop/Bitirme/data/ADNI/053_S_...,/Users/toygar/Desktop/Bitirme/data/train/053_S...


In [153]:
train.to_csv(os.path.join(path, 'train/train.csv'), index=False)
test.to_csv(os.path.join(path, 'test/test.csv'), index=False)