# Setup

In [13]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
%autoreload 2

import copy
import functools
import itertools
import logging
import re
import time
import yaml
from collections import Counter

import humanize
from pymicro.file import file_utils
import tensorflow as tf
from tqdm import tqdm
import numpy as np
import scipy as sp
from numpy import ndarray
from matplotlib import pyplot as plt, patches, cm
from sklearn import metrics as met
from progressbar import progressbar as pbar
from tensorflow.keras import layers
from sklearn import metrics, preprocessing
from sklearn import model_selection
import skimage
from skimage import measure, io
import pandas as pd

import tomo2seg.data as tomo2seg_data
from tomo2seg.data import ModelPaths, Volume, EstimationVolume, SetPartition
from tomo2seg.volume_img_segm import VolumeImgSegmSequence
from tomo2seg import viz
from tomo2seg.logger import logger

In [15]:
from tomo2seg.data import VOLUME_PRECIPITATES_V1 as VOL_NAME_VERSION
logger.debug(f"{VOL_NAME_VERSION=}")

[2020-11-12::16:49:23.425] tomo2seg :: DEBUG :: {<ipython-input-15-626a1365fa0c>:<module>:002}
VOL_NAME_VERSION=('PA66GF30', 'v1')



In [16]:
logger.setLevel(logging.DEBUG)

In [17]:
random_state = 42
random_state = np.random.RandomState(random_state)

# Load from the disk

In [22]:
# Metadata/paths objects

## Volume
volume = Volume.with_check(
    volume_name := VOL_NAME_VERSION[0], 
    volume_version := VOL_NAME_VERSION[1]
)
logger.info(f"{volume=}")

hst_read = lambda x: functools.partial(
    # from pymicro
    file_utils.HST_read,
    # pre-loaded kwargs
    autoparse_filename=False,  # the file names are not properly formatted
    data_type=volume.metadata.dtype,
    dims=volume.metadata.dimensions,
    verbose=True,
)(str(x))  # it doesn't accept paths...

logger.info("Loading data from disk.")

labels_version = 'refined'

## Labels
labels_volume = hst_read(volume.versioned_labels_path(labels_version))
logger.debug(f"{labels_volume.shape=}")

blobs_target_label = 1
blobs_table_path = volume.blobs3d_props_path(blobs_target_label, labels_version)
blobs_volume_path = volume.blobs3d_volume_path(blobs_target_label, labels_version)

logger.debug(f"{blobs_table_path}\n{blobs_volume_path}")

[2020-11-12::16:50:08.854] tomo2seg :: DEBUG :: {data.py:with_check:214}
vol=Volume(name='PA66GF30', version='v1', _metadata=None)

[2020-11-12::16:50:08.856] tomo2seg :: ERROR :: {data.py:with_check:232}
Missing file: /home/users/jcasagrande/projects/tomo2seg/data/PA66GF30.v1/PA66GF30.v1.labels.raw

Missing file: /home/users/jcasagrande/projects/tomo2seg/data/PA66GF30.v1/PA66GF30.v1.weights.raw

[2020-11-12::16:50:08.857] tomo2seg :: DEBUG :: {data.py:metadata:171}
Loading metadata from `/home/users/jcasagrande/projects/tomo2seg/data/PA66GF30.v1/PA66GF30.v1.metadata.yml`.

[2020-11-12::16:50:08.863] tomo2seg :: INFO :: {<ipython-input-22-eaa8ce800c52>:<module>:008}
volume=Volume(name='PA66GF30', version='v1', _metadata=Volume.Metadata(dimensions=[1300, 1040, 1900], dtype='uint8', labels=[0, 1, 2], labels_names={0: 'matrix', 1: 'fiber', 2: 'porosity'}, set_partitions={'train': {'x_range': [0, 1299], 'y_range': [0, 1039], 'z_range': [0, 1299], 'alias': 'train'}, 'val': {'x_range': [0, 1

# Blobs


In [23]:
%%time
from typing import Tuple, List, Optional


def get_3dblobs_props_table(
    binary_volume: ndarray, 
    props: List[str],
    label: Optional[int] = None,
    blobs_volume_use_int32: Optional[bool] = True,
    table_use_float16: Optional[bool] = False,
    return_blobs_volume: Optional[bool] = False,
    skimage_label_kwargs: dict = dict(),
    regionsprops_table_kwargs: dict = dict(),
):
    
    logger.debug("Getting connected components (blobs).")
    blobs, nblobs = skimage.measure.label(
        binary_volume, return_num=True,
        **skimage_label_kwargs
    )
    logger.info(f"Found {nblobs} blobs.")
    
    if blobs_volume_use_int32:
        blobs = blobs.astype(np.int32)  # reduce some memory, up to 2 billion bodies!

    logger.debug("Getting a table of props with `skimage.measure.regionprops_table()`.")
    props_table = skimage.measure.regionprops_table(
        blobs,
        cache=True,
        properties=props,
        **regionsprops_table_kwargs
    )
    
    logger.debug("Adjusting some data.")
    props_table["id"] = props_table["label"]
    
    if label is not None:
        props_table["label"] = label * np.ones_like(props_table["label"])
    else:
        del props_table["label"]
        
    if table_use_float16:
        logger.debug("Converting floats to float16.")
        for k, v in props_table.items():
            if v.dtype in (np.float, np.float64, np.float32, np.float128):
                props_table[k] = v.astype(np.float16)  # reduce memory usage
    
    if return_blobs_volume:
        return props_table, blobs
    else:
        return props_table

    
PROPS = [
    "label", 
    "area", 
#     "convex_area",  # this breaks, error in `convex-hull-error-stack-trace.txt`
    "bbox", "bbox_area", "extent", "filled_area", 
    "centroid", "local_centroid", 
    "inertia_tensor", "moments_central",
    "major_axis_length", "minor_axis_length", 
    "slice",
    # todo add properties using the intensity image
]

get_table_and_volume = functools.partial(
    get_3dblobs_props_table,
    props=PROPS,
    blobs_volume_use_int32=True,
    table_use_float16=True,
    return_blobs_volume=True,
    skimage_label_kwargs=dict(connectivity=3, background=0,),
    regionsprops_table_kwargs=dict(separator="-",)
)


test_vol = labels_volume 
test_label = blobs_target_label
test_vol = test_vol[:(sz := 100), :sz, :sz]
test_blobs_table, test_blobs_volume = get_table_and_volume(test_vol, label=test_label)
logger.debug(f"{len(test_blobs_table)=}   {test_blobs_volume.shape=}")

[2020-11-12::16:50:12.149] tomo2seg :: DEBUG :: {<timed exec>:get_3dblobs_props_table:015}
Getting connected components (blobs).

[2020-11-12::16:50:12.172] tomo2seg :: INFO :: {<timed exec>:get_3dblobs_props_table:020}
Found 164 blobs.

[2020-11-12::16:50:12.174] tomo2seg :: DEBUG :: {<timed exec>:get_3dblobs_props_table:025}
Getting a table of props with `skimage.measure.regionprops_table()`.



region-properties-init 100% (164 of 164) || Elapsed Time: 0:00:00 Time:  0:00:00
(1 /13) label 100% (164 of 164) |########| Elapsed Time: 0:00:00 Time:  0:00:00
(2 /13) area 100% (164 of 164) |#########| Elapsed Time: 0:00:00 Time:  0:00:00
(3 /13) bbox-(0,) 100% (164 of 164) |####| Elapsed Time: 0:00:00 Time:  0:00:00
(3 /13) bbox-(1,) 100% (164 of 164) |####| Elapsed Time: 0:00:00 Time:  0:00:00
(3 /13) bbox-(2,) 100% (164 of 164) |####| Elapsed Time: 0:00:00 Time:  0:00:00
(3 /13) bbox-(3,) 100% (164 of 164) |####| Elapsed Time: 0:00:00 Time:  0:00:00
(3 /13) bbox-(4,) 100% (164 of 164) |####| Elapsed Time: 0:00:00 Time:  0:00:00
(3 /13) bbox-(5,) 100% (164 of 164) |####| Elapsed Time: 0:00:00 Time:  0:00:00
(4 /13) bbox_area 100% (164 of 164) |####| Elapsed Time: 0:00:00 Time:  0:00:00
(5 /13) extent 100% (164 of 164) |#######| Elapsed Time: 0:00:00 Time:  0:00:00
(6 /13) filled_area 100% (164 of 164) |##| Elapsed Time: 0:00:00 Time:  0:00:00
(7 /13) centroid-(0,) 100% (164 of 164)

[2020-11-12::16:50:12.742] tomo2seg :: DEBUG :: {<timed exec>:get_3dblobs_props_table:033}
Adjusting some data.

[2020-11-12::16:50:12.743] tomo2seg :: DEBUG :: {<timed exec>:get_3dblobs_props_table:042}
Converting floats to float16.

[2020-11-12::16:50:12.752] tomo2seg :: DEBUG :: {<timed exec>:<module>:080}
len(test_blobs_table)=94   test_blobs_volume.shape=(100, 100, 100)

CPU times: user 615 ms, sys: 70.1 ms, total: 685 ms
Wall time: 612 ms


In [None]:
%%time

logger.debug(f"Getting `{volume.metadata.labels_names[blobs_target_label]}` (idx={blobs_target_label}) blobs.")
blobs_table, blobs_volume = get_table_and_volume(
    (labels_volume == blobs_target_label).astype(np.int8), 
    label=blobs_target_label
)

[2020-11-12::16:50:12.814] tomo2seg :: DEBUG :: {<timed exec>:<module>:001}
Getting `fiber` (idx=1) blobs.

[2020-11-12::16:50:14.577] tomo2seg :: DEBUG :: {<timed exec>:get_3dblobs_props_table:015}
Getting connected components (blobs).

[2020-11-12::16:52:10.937] tomo2seg :: INFO :: {<timed exec>:get_3dblobs_props_table:020}
Found 52814 blobs.

[2020-11-12::16:52:19.902] tomo2seg :: DEBUG :: {<timed exec>:get_3dblobs_props_table:025}
Getting a table of props with `skimage.measure.regionprops_table()`.



region-properties-init 100% (52814 of 52814) || Elapsed Time: 0:00:00 Time:  0:00:00
(1 /13) label 100% (52814 of 52814) |####| Elapsed Time: 0:00:00 Time:  0:00:00
(2 /13) area 100% (52814 of 52814) |#####| Elapsed Time: 0:00:03 Time:  0:00:03
(3 /13) bbox-(0,) 100% (52814 of 52814) || Elapsed Time: 0:00:00 Time:  0:00:00
(3 /13) bbox-(1,) 100% (52814 of 52814) || Elapsed Time: 0:00:00 Time:  0:00:00
(3 /13) bbox-(2,) 100% (52814 of 52814) || Elapsed Time: 0:00:00 Time:  0:00:00
(3 /13) bbox-(3,) 100% (52814 of 52814) || Elapsed Time: 0:00:00 Time:  0:00:00
(3 /13) bbox-(4,) 100% (52814 of 52814) || Elapsed Time: 0:00:00 Time:  0:00:00
(3 /13) bbox-(5,) 100% (52814 of 52814) || Elapsed Time: 0:00:00 Time:  0:00:00
(4 /13) bbox_area 100% (52814 of 52814) || Elapsed Time: 0:00:00 Time:  0:00:00
(5 /13) extent 100% (52814 of 52814) |###| Elapsed Time: 0:00:00 Time:  0:00:00
(6 /13) filled_area 100% (52814 of 52814) || Elapsed Time: 0:00:13 Time:  0:00:13:27
(7 /13) centroid-(0,) 100% (52

In [None]:
logger.debug(f"Converting table to panda.")
blobs_table = pd.DataFrame(blobs_table)

In [None]:
logger.debug(f"Saving table at `{blobs_table_path=}`")
blobs_table.to_csv(blobs_table_path)

logger.debug(f"Saving volume at `{blobs_volume_path=}`")
file_utils.HST_write(blobs_volume, str(blobs_volume_path), mode='w', verbose=True)