# Prepare necessary data

Download 2D dataset from:
https://surfer.nmr.mgh.harvard.edu/ftp/data/neurite/data/neurite-oasis.2d.v1.0.tar

via:
https://github.com/adalca/medical-datasets/blob/master/neurite-oasis.md

Downlading the 2D dataset from Neurite OASIS, each subject has slice_norm.nii.gz, slice_orig.nii.gz, slice_seg4.nii.gz, and slice_seg24.nii.gz files. Of these, only the slice_norm.nii.gz and slice_seg24.nii.gz files are needed. For simplicity, make a copy of each subject folder with only these two files.

In [1]:
import os
import shutil
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt


In [2]:
# set up read directory with original data, write directory for reduced data

src_dir = 'neurite-oasis.2d.v1.0'
dst_dir = '2Dreg_traintestt'

os.makedirs(dst_dir, exist_ok=False)

os.makedirs(os.path.join(dst_dir, 'test'), exist_ok=False)
os.makedirs(os.path.join(dst_dir, 'train'), exist_ok=False)

In [3]:
# read two needed files from each subject subdirectory, and reproduce the file structure in new dataset

num_dir=0

for item in os.listdir(src_dir):
    item_path = os.path.join(src_dir, item)

    # all subdirectories are subjects; reproduce the subdirectory and copy the two files needed
    if os.path.isdir(item_path):

        # divide into test, train set with 100 test samples, rest (314) train samples
        if num_dir < 100:
            subject_dir = os.path.join(dst_dir, 'test', item)
            num_dir += 1

        else:
            subject_dir = os.path.join(dst_dir, 'train', item)

        os.makedirs(subject_dir, exist_ok=False)

        img_file = os.path.join(item_path, 'slice_norm.nii.gz')
        seg_file = os.path.join(item_path, 'slice_seg24.nii.gz')

        if os.path.exists(img_file):
            shutil.copy(img_file, os.path.join(subject_dir, 'slice_norm.nii.gz'))

        if os.path.exists(seg_file):
            shutil.copy(seg_file, os.path.join(subject_dir, 'slice_seg24.nii.gz'))

    # also copy the readme, .txt files with info on segmentation, subjects
    elif os.path.isfile(item_path):
        shutil.copy(item_path, os.path.join(dst_dir, item))
        

In [9]:
# check if the number of subjects is copied correctly
# compare number of subdirectories copied and number of lines in subjects.txt

num_subject_dirs = 0

for item in os.listdir(dst_dir):
    item_path = os.path.join(dst_dir, item)

    # also have to scan through train, test subdirectories
    if os.path.isdir(item_path):

        for subdir in os.listdir(item_path):
            subdir_path = os.path.join(item_path, subdir)
            
            if os.path.isdir(item_path):
                num_subject_dirs +=1

# read number of lines from subjects.txt file
with open(r"2Dreg_data/subjects.txt", 'r') as fp:
    num_subject_lines = len(fp.readlines())

print(f'Equal number of directories and lines in subjects.txt file?\n{num_subject_dirs==num_subject_lines}')


Equal number of directories and lines in subjects.txt file?
True
