# Feature and Lable Generation
#### Here we read the stripped brain images, normalize them, make them to have the same size, and generate 

In [1]:
import numpy as np
import math
import pandas as pd
from nilearn import image
from nilearn import plotting
import matplotlib as plt
import sys
import os
import fnmatch
import warnings
warnings.filterwarnings("ignore")
import json
import nibabel
from nilearn.image import resample_img

###  Lables of the dataset
#### Here we read from the dataset the lables as *'Normal', 'MCI', 'LMCI', 'AD'* and merge the two middle classes into one. Also, here we extract the age and gender of the patients.

In [3]:
import operator
ADNI1_Lables_dir = '../../Data_Set/Data_Lables/'

alldata = pd.read_csv(ADNI1_Lables_dir + "ADNI1AND2.csv")               
dict_imageStatus = dict(zip(alldata.Subject_ID, alldata.DX_Group))  # Dict: k: patients' IDs, v: Alzheimer status
dict_Age = dict(zip(alldata.Subject_ID, alldata.Age))               # Dict: k: patients' IDs, v: age
dict_Sex = dict(zip(alldata.Subject_ID, alldata.Sex))               # Dict: k: patients' IDs, v: sex

age_max = max(dict_Age.items(), key=operator.itemgetter(1))[1]
dict_Age = {k: v/age_max for k, v in dict_Age.items()}  # Normalizing ages by max age among patients

sex_to_num = {'M':1, 'F':0}     # A mask for converting sex to numbers
dict_Sex = {k: sex_to_num[dict_Sex[k]] for k, v in dict_Sex.items()}

num_patients = len(dict_imageStatus)
print('number of distinct patients = ', num_patients)

number of distinct patients =  1797


In [10]:
def load_and_scale(X_img , scale, nx_orig ,ny_orig,nz_orig  ):

    
    #X_img 3d image of size  nx_orig x ny_orig x nz_orig
# Size of the original Image
    nx_orig = 189
    ny_orig = 212
    nz_orig = 135 

    
#Calculate the new dimensions
    nx = int(nx_orig//scale)
    ny = int(ny_orig//scale)
    nz = int(nz_orig//scale)
    source_affine = np.eye(4) / scale

#Interpolate in the new dimensions
    img_nii_original = nibabel.Nifti1Image(X_img, affine=source_affine)
    img_nii_scaled = resample_img(img_nii_original, target_affine=np.eye(4) , target_shape=(nx, ny, nz))
    X_img_final = img_nii_scaled.get_data()
    

    return  X_img_final

#### Here we read the size of the image with maximum size among the stripped MRI images

In [11]:
import re

# rootPath = '../Data_Set/test/' 
rootPath = '../../Data_Set/Data_Stripped_Images_Array/' 

file = open(rootPath + "maxANDmin.txt","r") 
first_line = re.split(', | =',file.readline())
file.close() 

max_x = int(first_line[3])
max_y = int(first_line[4])
max_z = int(first_line[5])

print('The size of image with maximum dimensions among all images: [max_x, max_y, max_z]= ', max_x, ",", max_y, ",",  max_z)

number_of_images = len([f for f in os.listdir(rootPath)]) - 2
print('Number of total images: ', number_of_images)


The size of image with maximum dimensions among all images: [max_x, max_y, max_z]=  189 , 212 , 135
Number of total images:  817


### Process of the dataset
#### Here we process the images and convert them into X Y values in bataches of size 10 to be given as input to the 3DCNN

In [12]:
[processedImgNo, counter] = [1, 1]    
batchSize = 8
batchNo   = 103
status_to_num = {'Normal':0, 'SMC':1, 'MCI':1, 'LMCI':1, 'AD':2}     # A mask for converting statuses to numbers 

# X_img: matrix of features 
# (number of samples (m) by number of features (n = max_x*max_y*max_z ))
# Y: matrix of lables 
# (number of samples (m) by 1)
X_img = np.array([])
X_age = np.array([])
X_sex = np.array([])
Y     = np.array([])
dict_imgData = dict()  # a dicionary from subject_ID to MRI picture, a 3rd order tensor

pattern = '*.npy'

XY_Values_Path = '../XY_Values_BatchSize'+ str(batchSize) +'_Scaled_Flipped/'            # XY_Values: folder for saving X matrix and Y matrix
# XY_Values_Path = '../XY_Values_BatchSize'+ str(batchSize) +'_Scaled/'            # XY_Values: folder for saving X matrix and Y matrix
if not os.path.exists(XY_Values_Path):
    os.makedirs(XY_Values_Path)
    
    
for root, dirs, files in os.walk(rootPath):
    
    for filename in fnmatch.filter(files, pattern):
        
        dataFiles = (os.path.join(root, filename))
        key = str(filename[0:-4])              # subject_ID
        val = np.load(dataFiles) 
        val_norm = val / np.max(val)
        
        # padding val matrix with zeros to enlarge them upto the size max_x by max_y by max_z
        [xshape, yshape, zshape] = [val_norm.shape[0], val_norm.shape[1], val_norm.shape[2]]
        [xpad, ypad, zpad] = [int((max_x-xshape)*0.5), int((max_y-yshape)*0.5), int((max_z-zshape)*0.5)]
        valPadded = np.array(np.pad(val_norm, ((xpad, xpad), (ypad, ypad), (zpad, zpad)), 'constant'))
        print('padded: ', valPadded.shape[0],valPadded.shape[1],valPadded.shape[2])

#         if valPadded.shape[0]-max_x != 0 or valPadded.shape[1]-max_y != 0 or valPadded.shape[2]-max_z != 0:
#             valPadded.resize(max_x, max_y, max_z)
        if valPadded.shape[0]-max_x != 0:
           nxpad = ((0, 1), (0, 0), (0, 0))
           valPadded = np.pad(valPadded, pad_width=nxpad, mode='constant', constant_values=0)
            
        
        if valPadded.shape[1]-max_y != 0 :
           nypad = ((0, 0), (0, 1), (0, 0))
           valPadded = np.pad(valPadded, pad_width=nypad, mode='constant', constant_values=0)

        if valPadded.shape[2]-max_z != 0:
           nzpad = ((0, 0), (0, 0), (0, 1))
           valPadded = np.pad(valPadded, pad_width=nzpad, mode='constant', constant_values=0)
        print('final paded: ', valPadded.shape[0],valPadded.shape[1],valPadded.shape[2])

        assert  (valPadded.shape[0] == max_x) , "x dimention != max_x"
        assert  (valPadded.shape[1] == max_y) , "y dimention != max_y"
        assert  (valPadded.shape[2] == max_z) , "z dimention != max_z"
        scale = 13.0/8.0
        valPadded = load_and_scale(valPadded , scale, max_x, max_y, max_z  )
#         valPadded = np.flip(valPadded,2)
        print('scaled final: ', valPadded.shape[0],valPadded.shape[1],valPadded.shape[2])
        print('')
        
        value = np.reshape(valPadded, [-1])                   # flattening the images data into a row vector
            
        if key in dict_imageStatus.keys():
            
            if counter % batchSize != 0:
                
                if X_img.size != 0:
                        X_img = np.vstack([X_img, (value)])
                        X_age = np.vstack([X_age, dict_Age[key]])
                        X_sex = np.vstack([X_sex, dict_Sex[key]])
                        Y = np.vstack([Y, status_to_num[dict_imageStatus[key]]])
                        
                        if processedImgNo == number_of_images:
                            np.save(XY_Values_Path + 'X_Img_Values' + str(batchNo) + '.npy', X_img)
                            np.save(XY_Values_Path + 'X_Age_Values' + str(batchNo) + '.npy', X_age)
                            np.save(XY_Values_Path + 'X_Sex_Values' + str(batchNo) + '.npy', X_sex)
                            np.save(XY_Values_Path + 'YValues' + str(batchNo) + '.npy', Y)

                            print('No. of images processed: ', processedImgNo , ', No. of batches saved: ', batchNo)

                else:
                    
                        X_img = (value)
                        X_age = dict_Age[key]
                        X_sex = dict_Sex[key]
                        Y = status_to_num[dict_imageStatus[key]] 
                        
            

            if counter % batchSize == 0:
                counter = 1
#                 X_img = (value)
#                 X_age = dict_Age[key]
#                 X_sex = dict_Sex[key]
#                 Y = status_to_num[dict_imageStatus[key]] 
                X_img = np.vstack([X_img, (value)])
                X_age = np.vstack([X_age, dict_Age[key]])
                X_sex = np.vstack([X_sex, dict_Sex[key]])
                Y = np.vstack([Y, status_to_num[dict_imageStatus[key]]])
                
                
                np.save(XY_Values_Path + 'X_Img_Values' + str(batchNo) + '.npy', X_img)
                np.save(XY_Values_Path + 'X_Age_Values' + str(batchNo) + '.npy', X_age)
                np.save(XY_Values_Path + 'X_Sex_Values' + str(batchNo) + '.npy', X_sex)
                np.save(XY_Values_Path + 'YValues' + str(batchNo) + '.npy', Y)

                print('No. of images processed: ', processedImgNo , ', No. of batches saved: ', batchNo)

                batchNo = batchNo + 1
                X_img = np.array([]) 
                X_sex = np.array([]) 
                X_age = np.array([]) 
                Y = np.array([]) 
                
            else: 
                
                counter = counter + 1
        
        processedImgNo = processedImgNo + 1
                
                
                
                
            
        

padded:  189 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 135
final paded:  189 212 135
scaled final:  116 130 83

No. of images processed:  8 , No. of batches saved:  103
padded:  189 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 135
final paded:  189 212 135
scaled f

scaled final:  116 130 83

padded:  188 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 134
final paded:  189 212 135
scaled final:  116 130 83

No. of images processed:  104 , No. of batches saved:  115
padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 135
final paded:  189 212 135
scaled final:  116 130 83

No. of images processed:  112 , No. of batches saved:  116
padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 135
final paded:  189 21

padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 135
final paded:  189 212 135
scaled final:  116 130 83

No. of images processed:  208 , No. of batches saved:  128
padded:  188 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 135
final paded:  189 212 135
scaled

scaled final:  116 130 83

No. of images processed:  304 , No. of batches saved:  140
padded:  188 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 134
final paded:  189 212 135
scaled final:  116 130 83

No. of images processed:  312 , No. of batches saved:  141
padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 135
final paded:  189 21

padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 135
final paded:  189 212 135
scaled final:  116 130 83

No. of images processed:  408 , No. of batches saved:  153
padded:  189 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 135
final paded:  189 212 135
scaled final:  116 130 83

No. of images processed:  416 , No. of batches saved:

scaled final:  116 130 83

padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 135
final paded:  189 212 135
scaled final:  116 130 83

No. of images processed:  512 , No. of batches saved:  166
padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 135
final

padded:  188 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 135
final paded:  189 212 135
scaled final:  116 130 83

No. of images processed:  608 , No. of batches saved:  178
padded:  189 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 135
final paded:  189 212 135
scaled final:  116 130 83

No. of images processed:  616 , No. of batches saved:  179
padded:  189 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 135
final paded:  189 212 135
scaled final:  116 13

scaled final:  116 130 83

padded:  188 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 135
final paded:  189 212 135
scaled final:  116 130 83

No. of images processed:  712 , No. of batches saved:  191
padded:  189 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 211 134
final

padded:  189 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  189 211 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 135
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

padded:  188 212 134
final paded:  189 212 135
scaled final:  116 130 83

No. of images processed:  816 , No. of batches saved:  204
padded:  189 212 135
final paded:  189 212 135
scaled final:  116 130 83

