# Generates crops ad pickle file using dataset_gen_pipe

This notebook crops images according to bounding box coordinates (one per sulcus)

# Imports

In [None]:
import sys
import os
import glob
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Import the aims module
from soma import aims
# the brainplot package
import colorado as cld

print((sys.version))

The following line permits to import deep_folding even if this notebook is executed from the notebooks subfolder (and no install has been launched):

 /notebooks/use_transform.ipynb  
 /deep_folding/__init__.py

In [None]:
sys.path.append((os.path.abspath('../')))
import deep_folding
print((os.path.dirname(deep_folding.__file__)))

In [None]:
_ALL_SUBJECTS = -1

# User-specific variables

In [None]:
sulcus = 'S.T.s.ter.asc.ant._left'

In [None]:
side = 'L'

We now assign path names and other user-specific variables.

The source directory is where the database lies. It contains the morphologist analysis subfolder ANALYSIS/3T_morphologist


In [None]:
src_dir = os.path.join(os.getcwd(), '../data/source/unsupervised')
src_dir = os.path.abspath(src_dir)
print(("src_dir = " + src_dir))

The target directory tgt_dir is where the files will be saved

In [None]:
tgt_dir = os.path.join(os.getcwd(), '../data/target/data/linear')
tgt_dir = os.path.abspath(tgt_dir)
print(("tgt_dir = " + tgt_dir))

The reference directory is where the equivalent reference file has been saved

In [None]:
ref_dir = os.path.join(os.getcwd(), '../data/reference/data/linear')
ref_dir = os.path.abspath(ref_dir)
print(("ref_dir = " + ref_dir))

In [None]:
transform_dir = os.path.join(os.getcwd(), '../data/reference/transform')
transform_dir = os.path.abspath(transform_dir)
print(("transform_dir = " + transform_dir))

In [None]:
bbox_dir = os.path.join(os.getcwd(), '../data/reference/bbox')
bbox_dir = os.path.abspath(bbox_dir)
print(("bbox_dir = " + bbox_dir))

In [None]:
mask_dir = os.path.join(os.getcwd(), '../data/reference/mask')
mask_dir = os.path.abspath(mask_dir)
print(("mask_dir = " + mask_dir))

In [None]:
print((sys.argv))

# Illustration of main program uses

We will first use the program with no effect by using number of subjects set to 0, or by calling the help function

### Using external calls

In [None]:
!python ../deep_folding/anatomist_tools/dataset_gen_pipe.py -n 0 -t tgt_local_dir

In [None]:
# Clean
!rm -rf tgt_local_dir

In [None]:
!python ../deep_folding/anatomist_tools/bounding_box.py --help

### By using the main function call

In [None]:
from deep_folding.anatomist_tools import dataset_gen_pipe
print((dataset_gen_pipe.__file__))

In [None]:
args = "-n 0 -t " + tgt_dir
argv = args.split(' ')

In [None]:
dataset_gen_pipe.main(argv)

In [None]:
args = "--help"
argv = args.split(' ')

In [None]:
dataset_gen_pipe.main(argv)

### By using the API function call

In [None]:
dataset_gen_pipe.dataset_gen_pipe(src_dir=src_dir,
                                  tgt_dir=tgt_dir,
                                  bbox_dir=bbox_dir,
                                  cropping='bbox',
                                  list_sulci=sulcus,
                                  side=side,
                                  number_subjects=0)

# Crops with linear interpolation

## Main program

In [None]:
interp = 'linear'

In [None]:
dataset_gen_pipe.dataset_gen_pipe(src_dir=src_dir,
                                  tgt_dir=tgt_dir,
                                  bbox_dir=bbox_dir,
                                  cropping='bbox',
                                  list_sulci=sulcus,
                                  side=side,
                                  interp=interp,
                                  number_subjects=1)

## Result analysis

### Analysis of the inputs

In [None]:
# Gets source file as numpy array
skeleton_dir = os.path.join(src_dir, "ANALYSIS/3T_morphologist/100206/t1mri/default_acquisition/default_analysis/segmentation")
vol_source_file = glob.glob(skeleton_dir + '/' + side + '*.nii.gz')
vol_source = aims.read(vol_source_file[0])
arr_source = vol_source.arraydata()
print("shape of source skeleton = ", arr_source.shape)

In [None]:
np.unique(arr_source)

In [None]:
pd.value_counts(np.resize(arr_source, arr_source.size))

### Analysis of the outputs

Prints the list of files of the target directory

In [None]:
print("Files in target directory:")
print(('\n'.join(os.listdir(tgt_dir))))

In [None]:
print("Files in crops target directory:")
print(('\n'.join(os.listdir(tgt_dir + '/' + side + 'crops'))))

In [None]:
tgt_json_file = glob.glob(tgt_dir + '/*.json')[0]
print("tgt_json_file = ", tgt_json_file, '\n')
with open(os.path.join(tgt_dir, tgt_json_file), 'r') as f:
    data_tgt = json.load(f)
    print((json.dumps(data_tgt, sort_keys=True, indent=4)))

Obtained output (we read the cropped file from the target directory):

In [None]:
# Gets target crop as numpy array
cropped_target_dir = os.path.join(tgt_dir, side+'crops')
vol_target_file = glob.glob(cropped_target_dir + '/' + '*.nii.gz')
vol_target = aims.read(vol_target_file[0])
arr_target = vol_target.arraydata()
print("shape of target cropped image = ", arr_target.shape)

In [None]:
np.unique(arr_target)

In [None]:
unique_target = np.unique(arr_target, return_counts=True)
print(unique_target)

In [None]:
pd.value_counts(np.resize(arr_target, arr_target.size)).head()

Expected output (we read the cropped file from the reference directory):

In [None]:
cropped_ref_dir = os.path.join(ref_dir, side+'crops')
vol_ref_file = glob.glob(cropped_ref_dir + '/' + '*.nii.gz')
vol_ref = aims.read(vol_ref_file[0])
arr_ref = vol_ref.arraydata()
print("shape of reference cropped image = ", arr_ref.shape)

In [None]:
pd.value_counts(np.resize(arr_ref, arr_ref.size)).head()

In [None]:
np.array_equal(arr_target, arr_ref)

In [None]:
epsilon = 1
difference = (abs(arr_ref-arr_target) >= epsilon)
number_differences = np.count_nonzero(difference)
index_of_differences = np.where(difference)
print("Number of different pixels : ", number_differences)
print("Index of different pixels : ", index_of_differences)

In [None]:
print(list(zip(arr_target[index_of_differences], arr_ref[index_of_differences])))

In [None]:
def are_arrays_almost_equal(arr1, arr2, epsilon, max_number_different_pixels):
    """Returns True if at most max_number_different_pixels pixels of arrays arr1 and arr2 
    differ by more than epsilon
    
    """
    difference = (abs(arr1-arr2) >= epsilon)
    number_different_pixels = np.count_nonzero(difference)
    return number_different_pixels <= max_number_different_pixels, number_different_pixels

In [None]:
equal_arrays, number_different_pixels = are_arrays_almost_equal(arr_ref, arr_target, 1, 2)

In [None]:
print(equal_arrays)

# Crops with mask and with nearest-neighbour interpolation

## Main program

In [None]:
tgt_dir_nearest = os.path.join(os.getcwd(), '../data/target/data/nearest')
tgt_dir_nearest = os.path.abspath(tgt_dir_nearest)
print(("tgt_dir = " + tgt_dir_nearest))

In [None]:
interp = 'nearest'
dataset_gen_pipe.dataset_gen_pipe(src_dir=src_dir,
                                  tgt_dir=tgt_dir_nearest,
                                  mask_dir=mask_dir,
                                  cropping='mask',
                                  list_sulci=sulcus,
                                  side=side,
                                  interp=interp,
                                  number_subjects=1)

## Result analysis

### Analysis of the inputs

In [None]:
# Gets source file as numpy array
skeleton_dir = os.path.join(src_dir, "ANALYSIS/3T_morphologist/100206/t1mri/default_acquisition/default_analysis/segmentation")
vol_source_file = glob.glob(skeleton_dir + '/' + side + '*.nii.gz')[0]
vol_source = aims.read(vol_source_file)
arr_source = vol_source.arraydata()
print("shape of source skeleton = ", arr_source.shape)

In [None]:
np.unique(arr_source)

In [None]:
pd.value_counts(np.resize(arr_source, arr_source.size))

### Analysis of the outputs

Prints the list of files of the target directory

In [None]:
print("Files in crops target directory:")
print(tgt_dir_nearest)
print(('\n'.join(os.listdir(tgt_dir_nearest + '/' + side + 'crops'))))

In [None]:
tgt_json_file = glob.glob(tgt_dir_nearest + '/*.json')[0]
print("tgt_json_file = ", tgt_json_file, '\n')
with open(os.path.join(tgt_dir_nearest, tgt_json_file), 'r') as f:
    data_tgt = json.load(f)
    print((json.dumps(data_tgt, sort_keys=True, indent=4)))

Obtained output (we read the cropped file from the target directory):

In [None]:
# Gets target crop as numpy array
cropped_target_dir = os.path.join(tgt_dir_nearest, side+'crops')
vol_target_file = glob.glob(cropped_target_dir + '/' + '*.nii.gz')
vol_target = aims.read(vol_target_file[0])
arr_target = vol_target.arraydata()
print("shape of target cropped image = ", arr_target.shape)

In [None]:
np.unique(arr_target)

The scope here is to compare the different numbers present on the target array and on the source array:

In [None]:
np.around(pd.value_counts(np.resize(arr_target, arr_target.size))/arr_target.size*100, 1)

In [None]:
np.around(pd.value_counts(np.resize(arr_source, arr_source.size))/arr_source.size*100, 1)

### Visualization

In [None]:
import anatomist.notebook as ana
a = ana.Anatomist()
print(a.headless_info.__dict__)

In [None]:
def visualize_all_image(file_name):
    
    # load source skeleton data (the SliceableObject)
    object_anat = a.loadObject(file_name)

    # create an Axial window in anatomist
    w = a.createWindow("Axial", geometry=[1200, 350, 500, 500])
    object_anat.addInWindows(w)

    return object_anat, w

In [None]:
print("Files in crops nearest target directory:")
print(('\n'.join(os.listdir(tgt_dir_nearest + '/' + side + 'crops'))))

In [None]:
target_file_dir = tgt_dir_nearest + '/' + side + 'crops'
target_file_nearest = glob.glob(target_file_dir + "/*.nii.gz")[0]
print(target_file_nearest)

In [None]:
visualize_all_image(target_file_nearest)

In [None]:
target_file_dir = tgt_dir + '/' + side + 'crops'
target_file_linear = glob.glob(target_file_dir + "/*.nii.gz")[0]
print(target_file_linear)

In [None]:
visualize_all_image(target_file_linear)

In [None]:
visualize_all_image(vol_source_file)