In [1]:
import numpy as np
import pandas as pd
import os
import shutil
import nibabel as nib
import pickle
from scipy.ndimage import zoom

In [2]:
path = './/Dataset'
dir_list = os.listdir(path)

filt_list = []

for i in dir_list:
    filt_list.append(path + '//' + i)

In [3]:
class Data:
    def __init__(self, tag):
        self.id = tag
        self.MASKED_GFC_FSEG = None
        self.SBJ_111 = None
        self.T88_111_GFC = None
        self.T88_111_MASKED_GFC = None
        self.RAW = []
        self.sex = None
        self.hand = None
        self.age = None
        self.educ = None
        self.SES = None
        self.MMSE = None
        self.CDR = None
        self.eTIV = None
        self.nWBV = None
        self.ASF = None
        self.Delay = None

In [4]:
df = pd.read_csv('oasis_cross-sectional.csv')

In [5]:
def save_object(obj, filename):
    with open(filename, 'wb') as outp:  # Overwrites any existing file.
        pickle.dumps(obj)

In [6]:
zoom_size = (0.25, 0.25, 0.25, 1)

In [7]:
for folder_path in filt_list:
    directory = os.listdir(folder_path)
    for i in directory:
        obj = Data(i)
        raw_dir = os.listdir(folder_path + '//' + i + '//RAW')
        raw_files = [j for j in raw_dir if j.endswith('.img')]
        for raw in raw_files:
            img = nib.load(folder_path + '//' + i + '//RAW//' + raw)
            obj.RAW.append(np.array(img.get_fdata())) #Incase reshaping required we can do it here itself
            
        fsl_seg_dir = os.listdir(folder_path + '//' + i  + '//FSL_SEG')
        fsl_seg_file = [j for j in fsl_seg_dir if j.endswith('.img')][0]
        img = nib.load(folder_path + '//' + i  + '//FSL_SEG//' + fsl_seg_file)
        obj.MASKED_GFC_FSEG = np.array(img.get_fdata())
        
        subj_dir = os.listdir(folder_path + '//' + i + '//' + 'PROCESSED//MPRAGE//SUBJ_111')
        subj_file = [j for j in subj_dir if j.endswith('.img')][0]
        img = nib.load(folder_path + '//' + i + '//' + 'PROCESSED//MPRAGE//SUBJ_111//' + subj_file)
        obj.SBJ_111 = np.array(img.get_fdata())
        
        t88_dir = os.listdir(folder_path + '//' + i + '//' + 'PROCESSED//MPRAGE//T88_111')
        
        t88_gfc_file = [j for j in t88_dir if j.endswith('t88_gfc.img')][0]
        img = nib.load(folder_path + '//' + i + '//' + 'PROCESSED//MPRAGE//T88_111//' + t88_gfc_file)
        obj.T88_111_GFC = zoom(img.get_fdata(dtype = 'float32'), zoom_size)
        
        t88_masked_gfc_file = [j for j in t88_dir if j.endswith('t88_masked_gfc.img')][0]
        img = nib.load(folder_path + '//' + i + '//' + 'PROCESSED//MPRAGE//T88_111//' + t88_masked_gfc_file)
        obj.T88_111_MASKED_GFC = zoom(img.get_fdata(dtype = 'float32'), zoom_size)
        
        row = df[df.ID == i].iloc[0]
        obj.sex = row['M/F']
        obj.hand = row['Hand']
        obj.age = row['Age']
        obj.educ = row['Educ']
        obj.SES = row['SES']
        obj.MMSE = row['MMSE']
        obj.CDR = row['CDR']
        obj.eTIV = row['eTIV']
        obj.nWBV = row['nWBV']
        obj.ASF = row['ASF']
        obj.Delay = row['Delay']
        
        save_object(obj, './/data_objects//' + i + '.pkl')
        cdr = obj.CDR if obj.CDR > 0 else 0
        # cnt = 1
        dPath = './/dataset_3D//'
        for arr in obj.RAW:
            np.savez_compressed(dPath + f"{i}_RAW_{cnt}.npz", buffer = arr, label = cdr)
            cnt += 1
        np.savez_compressed(dPath + f"{i}_MASKED_GFC_FSEG.npz", buffer = obj.MASKED_GFC_FSEG, label = cdr)
        np.savez_compressed(dPath + f"{i}_SBJ_111.npz", buffer = obj.SBJ_111, label = cdr)
        np.savez_compressed(dPath + f"{i}_T88_111_GFC.npz", buffer = obj.T88_111_GFC, label = cdr)
        np.savez_compressed(dPath + f"{i}_T88_111_MASKED_GFC.npz", buffer = obj.T88_111_MASKED_GFC, label = cdr)
    print('Completed directory: ', directory)

Completed directory:  ['OAS1_0001_MR1', 'OAS1_0002_MR1', 'OAS1_0003_MR1', 'OAS1_0004_MR1', 'OAS1_0005_MR1', 'OAS1_0006_MR1', 'OAS1_0007_MR1', 'OAS1_0009_MR1', 'OAS1_0010_MR1', 'OAS1_0011_MR1', 'OAS1_0012_MR1', 'OAS1_0013_MR1', 'OAS1_0014_MR1', 'OAS1_0015_MR1', 'OAS1_0016_MR1', 'OAS1_0017_MR1', 'OAS1_0018_MR1', 'OAS1_0019_MR1', 'OAS1_0020_MR1', 'OAS1_0021_MR1', 'OAS1_0022_MR1', 'OAS1_0023_MR1', 'OAS1_0025_MR1', 'OAS1_0026_MR1', 'OAS1_0027_MR1', 'OAS1_0028_MR1', 'OAS1_0029_MR1', 'OAS1_0030_MR1', 'OAS1_0031_MR1', 'OAS1_0032_MR1', 'OAS1_0033_MR1', 'OAS1_0034_MR1', 'OAS1_0035_MR1', 'OAS1_0037_MR1', 'OAS1_0038_MR1', 'OAS1_0039_MR1', 'OAS1_0040_MR1', 'OAS1_0041_MR1', 'OAS1_0042_MR1']
Completed directory:  ['OAS1_0349_MR1', 'OAS1_0350_MR1', 'OAS1_0351_MR1', 'OAS1_0352_MR1', 'OAS1_0353_MR1', 'OAS1_0353_MR2', 'OAS1_0354_MR1', 'OAS1_0355_MR1', 'OAS1_0356_MR1', 'OAS1_0357_MR1', 'OAS1_0358_MR1', 'OAS1_0359_MR1', 'OAS1_0361_MR1', 'OAS1_0362_MR1', 'OAS1_0363_MR1', 'OAS1_0365_MR1', 'OAS1_0366_MR1', 'O