In [None]:
print(sys.path)
print(os.path.abspath('..'))

In [None]:
!python -W ignore train.py -jn split-training

In [None]:
import os, sys
import datetime
import numpy as np
import random
import timeit
import argparse
import getpass

sys.path.append("..")
sys.path.append("../defacing")
from training.training import trainer
from helpers.utils import get_available_gpus
from distutils.dir_util import copy_tree
import tensorflow as tf

list_gpu = get_available_gpus()
n_gpu = len(list_gpu)
print("Available GPUs: ", list_gpu)

parser = argparse.ArgumentParser(description="Training DefacingNet")
parser.add_argument("--GPU", default="0", type=str, help="which GPU to use")
parser.add_argument(
    "-jn",
    "--job_name",
    required=True,
    type=str,
    help="The job name is required. All the training will be saved here.",
)

args = parser.parse_args()

t0 = timeit.default_timer()
# os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU
print(
    "GPU Availability: ",
    tf.test.is_gpu_available(cuda_only=True, min_cuda_compute_capability=None),
)


Kfolds = 10
nfolds = list(range(1, Kfolds + 1))

for fold in nfolds:
    root_dir = "./Logs/" + args.job_name + "/train_test_fold_{}".format(fold)
    dir_path = "./Logs/" + args.job_name + "/train_test_fold_{}/csv/".format(fold)

    # currently a very hacky way of doing this -- will need to fix later
    from_dir = os.path.abspath(
        "./csv/faced_defaced/train_test_fold_{}/csv/".format(fold)
    )
    to_dir = dir_path
    copy_tree(from_dir, to_dir)

    train_csv_path = os.path.join(dir_path, "training.csv")
    valid_csv_path = os.path.join(dir_path, "validation.csv")

    # Model Path
    model_path = root_dir + "/" + args.job_name

    # create a path to where the model will be saved
    if not os.path.exists(root_dir):
        os.makedirs(root_dir)

    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    if not os.path.exists(model_path):
        os.makedirs(model_path)

    # basic job info text file to identify jobs
    basic_job_info = os.path.join(os.path.abspath(root_dir), "job_info.txt")
    with open(basic_job_info, "w") as f:
        f.write("Jobname: %s\n" % args.job_name)
        f.write("Created on: %s\n" % str(datetime.datetime.now()))
        f.write("Created by: %s\n" % str(getpass.getuser()))
        f.write("Model store path: %s\n" % os.path.abspath(model_path))
        f.write(
            "GPU Availability: %s\n"
            % str(
                tf.test.is_gpu_available(
                    cuda_only=True, min_cuda_compute_capability=None
                )
            )
        )
        f.write("Available GPUs: %s\n" % (",".join(list_gpu)))

    train = trainer(
        train_csv_path,
        valid_csv_path,
        basic_job_info,
        model_path,
        image_size=32,
        batch_size=8,
        initial_epoch=0,
        nepochs=25,
        dropout=0.4,
        nclasses=2,
        nchannels=1,
        gpus=4,
    )
    train.train()

    elapsed = timeit.default_timer() - t0
    print("Time: {:.3f} min".format(elapsed / 60))
    del train


In [None]:
import matplotlib

matplotlib.use("Agg")

import os, sys
import glob
import random
import time
import imgaug
from imgaug import augmenters as iaa
import nibabel as nib
import SimpleITK as sitk
import tensorflow as tf
from tqdm import tqdm
import pandas as pd
import numpy as np
from six.moves import range

sys.path.append('..')
from helpers.utils import *
from skimage.restoration import denoise_wavelet


class DataGeneratoronFly(tf.keras.utils.Sequence):
    """
	"""

    def __init__(
        self,
        data_csv,
        nclasses=2,
        image_size=128,
        batch_size=32,
        nchannels=1,
        mode="Train",
        name=None,
        samples_per_epoch=None,
        transform=None,
    ):

        self.batch_size = batch_size
        self.image_size = image_size
        self.nchannels = nchannels
        self.nclasses = nclasses
        self.transform = transform
        self.name = name
        self.paths = []
        self.labels = []

        labels = pd.read_csv(data_csv)["Y"].values
        paths = pd.read_csv(data_csv)["X"].values

        index = np.arange(len(paths))
        np.random.shuffle(index)

        labels = labels[index]
        paths = paths[index]
        
        if mode.lower() in ["train", "valid", "test"]:
            self.mode = mode.lower()
        else:
            raise ValueError(
                "mode should be one among ['Train', 'Valid', 'Test'], given argument: {}".format(
                    mode
                )
            )
            
        if mode == "Train":
            minarr = [np.sum(labels == i) for i in range(nclasses)]
            mincount = np.min(minarr)
            for i in range(nclasses):
                self.paths.extend(paths[labels == i][:mincount])
                self.labels.extend(labels[labels == i][:mincount])

            self.paths = np.array(self.paths)
            self.labels = np.array(self.labels)

        elif mode == "Valid":
            self.paths = np.array(paths)
            self.labels = np.array(labels)

        assert len(np.unique(self.labels)) == nclasses
        self.len_arr = [sum(self.labels == arr) for arr in np.unique(self.labels)]

        index = np.arange(len(self.paths))
        np.random.shuffle(index)

        self.paths = self.paths[index]
        self.labels = self.labels[index]
        
        print(
            "============== paths: {}, labels: {} ================".format(
                len(self.paths), len(self.labels)
            )
        )
        
        if samples_per_epoch is None:
            if mode == "Train":
                self.samples_per_epoch = 4 * len(self.paths)
            else:
                self.samples_per_epoch = len(self.paths)
        else:
            self.samples_per_epoch = samples_per_epoch
            
        print(
            "============== Samples/Epoch: {} ================".format(
                self.samples_per_epoch
            )
        )

    def __len__(self):
        """
            Denotes the number of batches per epoch
        """
        return int(np.floor(self.samples_per_epoch / self.batch_size))

#     def __getitem__(self, index):
        
# #         print("getitem index: ", index)
#         # Generate indexes of the batch
#         X1, X2, X3, y = self.__data_generation(index)
#         if self.name == "combined":
#             return [X1, X2, X3], y
#         elif self.name == "axial":
# #             tf_dataset = tf.data.Dataset.from_tensor_slices((X1, y))
#             return X1, y
#         elif self.name == "coronal":
#             return X2, y
#         elif self.name == "sagittal":
#             return X3, y

    def _standardize_volume(self, volume, mask=None):
        """
			volume: volume which needs to be normalized
			mask: brain mask, only required if you prefer not to
				consider the effect of air in normalization
		"""
        if mask != None:
            volume = volume * mask

        mean = np.mean(volume[volume != 0])
        std = np.std(volume[volume != 0])

        return (volume - mean) / std

    def _normalize_volume(self, volume, mask=None, _type="MinMax"):
        """
			volume: volume which needs to be normalized
			mask: brain mask, only required if you prefer not to
				consider the effect of air in normalization
			_type: {'Max', 'MinMax', 'Sum'}
		"""
        if mask != None:
            volume = mask * volume

        min_vol = np.min(volume)
        max_vol = np.max(volume)
        sum_vol = np.sum(volume)

        if _type == "MinMax":
            return (volume - min_vol) / (max_vol - min_vol)
        elif _type == "Max":
            return volume / max_vol
        elif _type == "Sum":
            return volume / sum_vol
        else:
            raise ValueError(
                "Invalid _type, allowed values are: {}".format("Max, MinMax, Sum")
            )

    def _augmentation(self, volume):
        """
			Augmenters that are safe to apply to masks
			Some, such as Affine, have settings that make them unsafe, so always
			test your augmentation on masks
		"""
        volume_shape = volume.shape
        det = self.transform.to_deterministic()
        volume = det.augment_image(volume)

        assert volume.shape == volume_shape, "Augmentation shouldn't change volume size"
        return volume

    def _resizeVolume(self, volume):
        """
			resizes the original volume such that every patch is
			75% of original volume

			volume: numpy 3d tensor
		"""
        ratio = 1.0

        orig_size = (
            int(self.image_size / ratio),
            int(self.image_size / ratio),
            int(self.image_size / ratio),
        )
        resized_volume = resize_sitk(volume, orig_size)
        return resized_volume

    def _get_random_slices(self, volume):
        """
		"""
        dimensions = volume.shape
        img = np.zeros((dimensions[0], dimensions[1], 3))
        x = np.random.randint(dimensions[0] // 4, 3 * dimensions[0] // 4)
        z = np.random.randint(dimensions[1] // 4, 3 * dimensions[1] // 4)
        y = np.random.randint(dimensions[2] // 4, 3 * dimensions[2] // 4)
        slice_x = volume[x, :, :]
        slice_y = volume[:, y, :]
        slice_z = volume[:, :, z]

        return slice_x[..., None], slice_y[..., None], slice_z[..., None]

    def _center_align(self, volume):
        """
        """
        return volume

    def _axis_align(self, volume):
        """
        """
        return volume

    def __data_generation(self, index):
        """
        balanced data loader
        """
        X1, X2, X3 = [], [], []
        Y = []
        nclass_batch = self.batch_size // self.nclasses
        
#         print("Nclass batch: ", nclass_batch)
        
        for i in range(nclass_batch):
            for ii in np.unique(self.labels):
                # try:
                pid_path = self.paths[self.labels == ii][
                    int(index * nclass_batch + i) % self.len_arr[ii]
                ]
                label = ii  # np.eye(self.nclasses)[ii]

                volume, affine, size = load_vol(pid_path)
                volume = self._axis_align(volume)
                volume = self._center_align(volume)
                volume = self._resizeVolume(volume)
                volume = self._standardize_volume(volume)
                volume = self._normalize_volume(volume)

                if (self.mode.lower() == "train") and self.transform:
                    volume = self._augmentation(volume)

                ax, sg, co = self._get_random_slices(volume)

                if ax.shape == sg.shape == co.shape:
                    X1.append(ax)
                    X2.append(sg)
                    X3.append(co)
                    Y.append(label)
                # except:
                # 	continue

        X1, X2, X3, Y = np.array(X1), np.array(X2), np.array(X3), np.array(Y)
        
        
        index = np.arange(len(X1))
        np.random.shuffle(index)
        
        X1, X2, X3, Y = X1[index], X2[index], X3[index], Y[index]
        
#         print("X1.shape: ", X1.shape)
#         print(Y)
        
        return X1, X2, X3, Y
    
    

if __name__ == "__main__":

    dir_path = os.path.abspath("csv/faced_defaced/train_test_fold_1/csv/")

    csv_path = os.path.join(dir_path, "training.csv")
    
    print(dir_path, csv_path)
    
    augmentation = iaa.SomeOf(
        (0, 3),
        [
            iaa.Fliplr(0.5),
            iaa.Flipud(0.5),
            iaa.Noop(),
            iaa.OneOf(
                [iaa.Affine(rotate=90), iaa.Affine(rotate=180), iaa.Affine(rotate=270)]
            ),
            # iaa.GaussianBlur(sigma=(0.0, 0.2)),
        ],
    )

    # Parameters
    train_transform_params = {
        "image_size": 128,
        "batch_size": 32,
        "nclasses": 2,
        "nchannels": 1,
        "name": "axial",
        "samples_per_epoch": None,
        "transform": augmentation,
    }

    valid_transform_params = {
        "image_size": 128,
        "batch_size": 32,
        "nclasses": 2,
        "nchannels": 1,
        "name": "axial",
        "samples_per_epoch": None,
        "transform": None,
    }

    # Generators
    training_generator = DataGeneratoronFly(data_csv=csv_path, **train_transform_params)
    
#     X, Y - (133*32, 128, 128, 1) (133*32)
    
    # print (training_generator.__len__())

#     validation_generator = DataGeneratoronFly(data_csv=csv_path, **valid_transform_params)
#     print(validation_generator.__len__())
    
#     print(training_generator[0][0].shape)
#     print(training_generator[0][1].shape)

    
#     train_dataset = tf.data.Dataset.from_tensor_slices(training_generator)
    
#     for X, y in training_generator:
#         print (X.shape, y.shape)
#         print (y[:4])
#         imshow(X[0,:,:,64, 0], X[1,:,:,64, 0], X[2,:,:,64, 0], X[3,:,:,64, 0])
        
        
#     for ep in range(5):
#         print ("============================")
#         for X, y in validation_generator:
#             print (X.shape, y.shape)
#             print (y[:4])
		# imshow(X[0,:,:,64, 0], X[1,:,:,64, 0], X[2,:,:,64, 0], X[3,:,:,64, 0])
        

#     import time
#     import matplotlib.pyplot as plt

#     start_time = time.time()
#     for i, (X, y) in enumerate(validation_generator):
#         elapsed_time = time.time() - start_time
#         start_time = time.time()
#         plt.subplot(1, 3, 1)
#         plt.imshow(X[0][0][:, :, 0])
#         plt.subplot(1, 3, 2)
#         plt.imshow(X[1][0][:, :, 0])
#         plt.subplot(1, 3, 3)
#         plt.imshow(X[2][0][:, :, 0])
#         plt.title(str(y[0]))
#         plt.savefig(str(i) + "_.png")
#         print(y, type(X))
#         print(X[0].shape, X[1].shape, X[2].shape)
#         print(i, "Elapsed Time", np.round(elapsed_time, decimals=2), "seconds")
#         pass


In [None]:
import tensorflow as tf

import nobrainer
from nobrainer import dataset, volume

dir_path = os.path.abspath("csv/faced_defaced/train_test_fold_1/csv/")
csv_path = os.path.join(dir_path, "training.csv")
# print(dir_path, csv_path)


labels = pd.read_csv(csv_path)["Y"].values
paths = pd.read_csv(csv_path)["X"].values


# print(labels)

n_classes=2
volume_shape = (256, 256, 256)
block_shape = (128, 128, 128)


training_paths = zip(paths, labels)

print(training_paths)

In [None]:
!pip install --upgrade nibabel

In [7]:
import os, sys
sys.path.append("..")
import binascii
from helpers.utils import load_vol, save_vol
from preprocessing.normalization import standardize_volume, normalize_volume
from preprocessing.conform import conform_data
import numpy as np
from glob import glob
from pathlib import Path
import nibabel as nib
from shutil import *
import subprocess


orig_data_face = "/work/01329/poldrack/data/mriqc-net/data/face/T1w"
orig_data_deface = "/work/01329/poldrack/data/mriqc-net/data/defaced"

save_data_face = "/work/06850/sbansal6/maverick2/mriqc-shared/face"
save_data_deface = "/work/06850/sbansal6/maverick2/mriqc-shared/deface"

os.makedirs(save_data_face, exist_ok=True)
os.makedirs(save_data_deface, exist_ok=True)


conform_size = (64, 64, 64)

def is_gz_file(filepath):
    if os.path.splitext(filepath)[1] == '.gz':
        with open(filepath, 'rb') as test_f:
            return binascii.hexlify(test_f.read(2)) == b'1f8b'
    

# in_file = '/work/01329/poldrack/data/mriqc-net/data/face/T1w/ds000009_anat/sub-01_T1w.nii.gz'
# dst_path = '/work/06850/sbansal6/maverick2/mriqc-shared/face'

# print(is_gz_file(in_file))

# if not is_gz_file(in_file):
#     filename = in_file.split("/")[-1]
#     print(filename)
#     rename_file = os.path.splitext(filename)[0]
#     dst = os.path.join(dst_path, rename_file)
    
#     subprocess.call(['cp', in_file, dst])
    
# in_file = '/work/06850/sbansal6/maverick2/mriqc-shared/face/conformed/sub-04_T1w.nii'
# if isinstance(in_file, (str, Path)):
#     in_file = nib.load(in_file)
    
    
# print(in_file)

# volume = conform_data(in_file, out_size=conform_size)

# print(type(volume))

def preprocess(pth, conform_size):
    """
    """
    print(pth)
    filename = pth.split("/")[-1]
    print('Confirmation step')
    volume = conform_data(pth, out_size=conform_size)
    
    print("Normalize/Standardize step")
    volume = normalize_volume(standardize_volume(volume))
    save_path = os.path.join(save_data_face, 'conformed', filename)

    newaffine = np.eye(4)
    newaffine[:3, 3] = -0.5 * (np.array(conform_size) - 1)
    nii = nib.Nifti1Image(volume, newaffine, None)
    
    print("Save new affine")
    nii.to_filename(save_path)
    return save_path


for path in glob(orig_data_face + "/*/*.nii.gz"):
    print(path)
    if not is_gz_file(path):
        tempname = path.split("/")[-1]
        rename_file = os.path.splitext(tempname)[0]
        dst = os.path.join(save_data_face, rename_file)
        print(dst)
        subprocess.call(['cp', path, dst])
        
        
        print(preprocess(dst, conform_size))
    else:
        print(preprocess(path, conform_size))


# for path in glob(orig_data_deface + "/*/*.nii.gz"):
#     try:
#         print(preprocess(path))
#     except:
#         pass

<class 'nibabel.nifti1.Nifti1Image'>
data shape (64, 64, 64)
affine: 
[[  1.    0.    0.  -31.5]
 [  0.    1.    0.  -31.5]
 [  0.    0.    1.  -31.5]
 [  0.    0.    0.    1. ]]
metadata:
<class 'nibabel.nifti1.Nifti1Header'> object, endian='<'
sizeof_hdr      : 348
data_type       : b''
db_name         : b''
extents         : 0
session_error   : 0
regular         : b''
dim_info        : 0
dim             : [ 3 64 64 64  1  1  1  1]
intent_p1       : 0.0
intent_p2       : 0.0
intent_p3       : 0.0
intent_code     : none
datatype        : float64
bitpix          : 64
slice_start     : 0
pixdim          : [1. 1. 1. 1. 1. 1. 1. 1.]
vox_offset      : 0.0
scl_slope       : nan
scl_inter       : nan
slice_end       : 0
slice_code      : unknown
xyzt_units      : 0
cal_max         : 0.0
cal_min         : 0.0
slice_duration  : 0.0
toffset         : 0.0
glmax           : 0
glmin           : 0
descrip         : b''
aux_file        : b''
qform_code      : unknown
sform_code      : aligned
quater