# Generates crops ad pickle file using dataset_gen_pipe

This notebook crops images according to bounding box coordinates (one per sulcus)

# Imports

In [1]:
import sys
import os
import glob
import json
import numpy as np
import pandas as pd

from soma import aims

The following line permits to import deep_folding even if this notebook is executed from the notebooks subfolder (and no install has been launched):

 /notebooks/use_transform.ipynb  
 /deep_folding/__init__.py

In [2]:
sys.path.append((os.path.abspath('../')))
import deep_folding
print(os.path.dirname(deep_folding.__file__))

/home/jc225751/Program/deep_folding/deep_folding


# User-specific variables

In [3]:
sulcus = 'S.T.s.ter.asc.ant._left'

In [4]:
side = 'L'

We now assign path names and other user-specific variables.

The source directory is where the database lies. It contains the morphologist analysis subfolder ANALYSIS/3T_morphologist


In [5]:
src_dir = os.path.join(os.getcwd(), '../data/source/unsupervised')
src_dir = os.path.abspath(src_dir)
print("src_dir = " + src_dir)

src_dir = /home/jc225751/Program/deep_folding/data/source/unsupervised


The target directory tgt_dir is where the files will be saved

In [6]:
tgt_dir = os.path.join(os.getcwd(), '../data/target/data')
tgt_dir = os.path.abspath(tgt_dir)
print("tgt_dir = " + tgt_dir)

tgt_dir = /home/jc225751/Program/deep_folding/data/target/data


The reference directory is where the equivalent reference file has been saved

In [7]:
ref_dir = os.path.join(os.getcwd(), '../data/reference/data')
ref_dir = os.path.abspath(ref_dir)
print("ref_dir = " + ref_dir)

ref_dir = /home/jc225751/Program/deep_folding/data/reference/data


In [8]:
transform_dir = os.path.join(os.getcwd(), '../data/reference/transform')
transform_dir = os.path.abspath(transform_dir)
print("transform_dir = " + transform_dir)

transform_dir = /home/jc225751/Program/deep_folding/data/reference/transform


In [9]:
bbox_dir = os.path.join(os.getcwd(), '../data/reference/bbox')
bbox_dir = os.path.abspath(bbox_dir)
print("bbox_dir = " + bbox_dir)

bbox_dir = /home/jc225751/Program/deep_folding/data/reference/bbox


In [10]:
print(sys.argv)

['/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py', '-f', '/casa/home/.local/share/jupyter/runtime/kernel-84227d14-a775-4a18-a6ba-f2360b175e2c.json']


# Illustration of main program uses

We will first use the program with no effect by using number of subjects set to 0, or by calling the help function

### Using external calls

In [11]:
!python ../deep_folding/anatomist_tools/dataset_gen_pipe.py -n 0 -t tgt_dir

In [12]:
!python ../deep_folding/anatomist_tools/bounding_box.py --help

usage: bounding_box.py [-h] [-s SRC_DIR [SRC_DIR ...]] [-t TGT_DIR]
                       [-u SULCUS] [-i SIDE] [-m IMAGE_NORMALIZED_SPM]
                       [-p PATH_TO_GRAPH] [-n NB_SUBJECTS]

Computes bounding box around the named sulcus

optional arguments:
  -h, --help            show this help message and exit
  -s SRC_DIR [SRC_DIR ...], --src_dir SRC_DIR [SRC_DIR ...]
                        Source directory where the MRI data lies. If there are
                        several directories, add all directories one after the
                        other. Example: -s DIR_1 DIR_2. Default is :
                        /neurospin/lnao/PClean/database_learnclean/all/
  -t TGT_DIR, --tgt_dir TGT_DIR
                        Target directory where to store the output
                        transformation files. Default is :
                        /neurospin/dico/deep_folding_data/test/bbox
  -u SULCUS, --sulcus SULCUS
                        Sulcus name around whi

### By using the main function call

In [13]:
from deep_folding.anatomist_tools import dataset_gen_pipe
print(dataset_gen_pipe.__file__)

/home/jc225751/Program/deep_folding/deep_folding/anatomist_tools/dataset_gen_pipe.pyc


In [14]:
args = "-n 0 -t " + tgt_dir
argv = args.split(' ')

In [15]:
dataset_gen_pipe.main(argv)

In [16]:
args = "--help"
argv = args.split(' ')

In [17]:
dataset_gen_pipe.main(argv)

usage: dataset_gen_pipe.py [-h] [-s SRC_DIR] [-t TGT_DIR] [-r TRANSFORM_DIR]
                           [-b BBOX_DIR] [-u SULCUS [SULCUS ...]] [-i SIDE]
                           [-n NB_SUBJECTS]

Generates cropped and pickle files

optional arguments:
  -h, --help            show this help message and exit
  -s SRC_DIR, --src_dir SRC_DIR
                        Source directory where the MRI data lies. Default is :
                        /neurospin/hcp
  -t TGT_DIR, --tgt_dir TGT_DIR
                        Target directory where to store the cropped and pickle
                        files. Default is :
                        /neurospin/dico/deep_folding_data/test
  -r TRANSFORM_DIR, --transform_dir TRANSFORM_DIR
                        Transform directory where transformation files from
                        native to Talairach files have been stored. Default is
                        : /neurospin/dico/deep_folding_data/test/transform
  -b BBOX_DIR, --bbox_dir BBOX_DIR
       

### By using the API function call

In [18]:
dataset_gen_pipe.dataset_gen_pipe(src_dir=src_dir,
                                  tgt_dir=tgt_dir,
                                  transform_dir=transform_dir,
                                  bbox_dir=bbox_dir,
                                  list_sulci=sulcus,
                                  side=side,
                                  number_subjects=0)

# Test example

In [19]:
dataset_gen_pipe.dataset_gen_pipe(src_dir=src_dir,
                                  tgt_dir=tgt_dir,
                                  transform_dir=transform_dir,
                                  bbox_dir=bbox_dir,
                                  list_sulci=sulcus,
                                  side=side,
                                  number_subjects=dataset_gen_pipe._ALL_SUBJECTS)

# Result analysis

### Analysis of the inputs

In [20]:
# Gets source file as numpy array
skeleton_dir = os.path.join(src_dir, "ANALYSIS/3T_morphologist/100206/t1mri/default_acquisition/default_analysis/segmentation")
vol_source_file = glob.glob(skeleton_dir + '/' + side + '*.nii.gz')
vol_source = aims.read(vol_source_file[0])
arr_source = vol_source.arraydata()
print "shape of source skeleton = ", arr_source.shape

shape of source skeleton =  (1, 260, 311, 260)


In [21]:
np.unique(arr_source)

array([ 0, 10, 11, 30, 40, 60, 80], dtype=int16)

In [22]:
pd.value_counts(np.resize(arr_source, arr_source.size))

11    18832738
0      2012736
60      163069
30        9634
80        5330
40          87
10           6
dtype: int64

### Analysis of the outputs

Prints the list of files of the target directory

In [23]:
print("Files in target directory:")
print('\n'.join(os.listdir(tgt_dir)))

Files in target directory:
Lskeleton.pkl
dataset.json
Lcrops


In [24]:
print("Files in crops target directory:")
print('\n'.join(os.listdir(tgt_dir + '/' + side + 'crops')))

Files in crops target directory:
100206_normalized.nii.gz
100206_normalized.nii.gz.minf


In [25]:
tgt_json_file = glob.glob(tgt_dir + '/*.json')[0]
print "tgt_json_file = ", tgt_json_file, '\n'
with open(os.path.join(tgt_dir, tgt_json_file), 'r') as f:
    data_tgt = json.load(f)
    print(json.dumps(data_tgt, sort_keys=True, indent=4))

tgt_json_file =  /home/jc225751/Program/deep_folding/data/target/data/dataset.json 

{
    "bbmax": [
        137, 
        153, 
        78
    ], 
    "bbmin": [
        112, 
        129, 
        33
    ], 
    "bbox_dir": "/home/jc225751/Program/deep_folding/data/reference/bbox", 
    "cropped_dir": "/home/jc225751/Program/deep_folding/data/target/data/Lcrops", 
    "date": "2021-04-27 10:58:47", 
    "git_sha": "910f65e6462520d432494d6306d34b598be2aee1", 
    "is_git": true, 
    "list_sulci": [
        "S.T.s.ter.asc.ant._left"
    ], 
    "nb_subjects": 1, 
    "repo_working_dir": "/home/jc225751/Program/deep_folding", 
    "side": "L", 
    "src_dir": "/home/jc225751/Program/deep_folding/data/source/unsupervised", 
    "tgt_dir": "/home/jc225751/Program/deep_folding/data/target/data", 
    "timestamp": 1619513927.204861, 
    "transform_dir": "/home/jc225751/Program/deep_folding/data/reference/transform"
}


Obtained output (we read the cropped file from the target directory):

In [26]:
# Gets target crop as numpy array
cropped_target_dir = os.path.join(tgt_dir, side+'crops')
vol_target_file = glob.glob(cropped_target_dir + '/' + '*.nii.gz')
vol_target = aims.read(vol_target_file[0])
arr_target = vol_target.arraydata()
print "shape of target cropped image = ", arr_target.shape

shape of target cropped image =  (1, 46, 25, 26)


In [27]:
np.unique(arr_target)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 66, 72],
      dtype=int16)

In [28]:
unique_target = np.unique(arr_target, return_counts=True)
print unique_target

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 66, 72],
      dtype=int16), array([21572,   252,   174,   184,   146,   137,   166,   114,   119,
         115,   107,  3337,   105,   120,   107,   105,   119,   124,
          92,   115,    94,    95,   109,   108,    98,   107,    95,
         121,   102,   139,   150,   131,   106,   109,    98,    95,
          92,    69,    62,    61,    47,    47,    45,    36,    36,
          39,    32,    37,    32,    28,    26,    27,    13,    15,
          16,    14,    14,    10,    10,    12,     7,     2,     1,
           1,     2]))


In [29]:
pd.value_counts(np.resize(arr_target, arr_target.size)).head()

0     21572
11     3337
1       252
3       184
2       174
dtype: int64

Expected output (we read the cropped file from the reference directory):

In [30]:
cropped_ref_dir = os.path.join(ref_dir, side+'crops')
vol_ref_file = glob.glob(cropped_ref_dir + '/' + '*.nii.gz')
vol_ref = aims.read(vol_ref_file[0])
arr_ref = vol_ref.arraydata()
print "shape of reference cropped image = ", arr_ref.shape

shape of reference cropped image =  (1, 46, 25, 26)


In [31]:
np.array_equal(arr_target, arr_ref)

False