# Setup

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

import copy
import functools
import itertools
import logging
import re
import time
import yaml
from collections import Counter

import humanize
from pymicro.file import file_utils
import tensorflow as tf
from tqdm import tqdm
import numpy as np
import scipy as sp
from numpy import ndarray
from matplotlib import pyplot as plt, patches, cm
from sklearn import metrics as met
from progressbar import progressbar as pbar
from tensorflow.keras import layers
from sklearn import metrics, preprocessing
from sklearn import model_selection
import skimage
from skimage import measure, io

import tomo2seg.data as tomo2seg_data
from tomo2seg.data import ModelPaths, Volume, EstimationVolume, SetPartition
from tomo2seg.volume_img_segm import VolumeImgSegmSequence
from tomo2seg import viz
from tomo2seg.logger import logger

In [3]:
from tomo2seg.data import VOLUME_PRECIPITATES_V1 as VOL_NAME_VERSION
logger.debug(f"{VOL_NAME_VERSION=}")

In [4]:
logger.setLevel(logging.DEBUG)

In [5]:
random_state = 42
random_state = np.random.RandomState(random_state)

# Load from the disk

In [6]:
# Metadata/paths objects

## Volume
volume = Volume.with_check(
    volume_name := VOL_NAME_VERSION[0], 
    volume_version := VOL_NAME_VERSION[1]
)
logger.info(f"{volume=}")

hst_read = lambda x: functools.partial(
    # from pymicro
    file_utils.HST_read,
    # pre-loaded kwargs
    autoparse_filename=False,  # the file names are not properly formatted
    data_type=volume.metadata.dtype,
    dims=volume.metadata.dimensions,
    verbose=True,
)(str(x))  # it doesn't accept paths...

logger.info("Loading data from disk.")

logger.info(f"*Input* versioned labels: {(labels_in_path := volume.versioned_labels_path(labels_in_version := 'refined'))=}")
logger.info(f"*Output* versioned labels: {(labels_out_path := volume.versioned_labels_path(labels_out_version := 'refined2'))=}")

## Labels
labels_volume = hst_read(labels_in_path)
logger.debug(f"{labels_volume.shape=}")

[2020-11-10::16:07:26.608] tomo2seg :: DEBUG :: {data.py:with_check:201}
vol=Volume(name='PA66GF30', version='v1', _metadata=None)

[2020-11-10::16:07:26.645] tomo2seg :: ERROR :: {data.py:with_check:219}
Missing file: /home/users/jcasagrande/projects/tomo2seg/data/PA66GF30.v1/PA66GF30.v1.labels.raw

Missing file: /home/users/jcasagrande/projects/tomo2seg/data/PA66GF30.v1/PA66GF30.v1.weights.raw

[2020-11-10::16:07:26.670] tomo2seg :: DEBUG :: {data.py:metadata:158}
Loading metadata from `/home/users/jcasagrande/projects/tomo2seg/data/PA66GF30.v1/PA66GF30.v1.metadata.yml`.

[2020-11-10::16:07:26.684] tomo2seg :: INFO :: {<ipython-input-6-034fde79d2fd>:<module>:008}
volume=Volume(name='PA66GF30', version='v1', _metadata=Volume.Metadata(dimensions=[1300, 1040, 1900], dtype='uint8', labels=[0, 1, 2], labels_names={0: 'matrix', 1: 'fiber', 2: 'porosity'}, set_partitions={'train': {'x_range': [0, 1299], 'y_range': [0, 1039], 'z_range': [0, 1299], 'alias': 'train'}, 'val': {'x_range': [0, 12

'/home/users/jcasagrande/projects/tomo2seg/data/PA66GF30.v1/PA66GF30.v1.labels'

# Blobs


In [114]:
from typing import Tuple, List, Optional
import multiprocessing as multip

props = [
    "label", 
    "area", "bbox", "bbox_area", "centroid", "eccentricity", "euler_number", "extent",
    "filled_area", "inertia_tensor_eigvals", "local_centroid", "major_axis_length", "minor_axis_length",
    "perimeter", "solidity", 
    # todo add properties using the intensity image
]


_get_blob_props_func = functools.partial(
    skimage.measure.regionprops_table,
    cache=True,
    separator="-",
    properties=props
)


def _do_instances_slice(args):
    idx, instances = args
    return {**_get_blob_props_func(instances), **{"slice_idx": idx}}


def get_2d_blobs_from_slices(slices_array: ndarray, label_to_search: int, n_processes: Optional[int]) -> list:
    
    instances_slices = (n_slices := slices_array.shape[0]) * [None]
    
    for idx, slice_ in pbar(
        enumerate(slices_array),
        max_value=n_slices,
        prefix="slices ",
    ):
        instances_slices[idx] = skimage.measure.label(
            slice_ == label_to_search, 
            connectivity=2, background=0, return_num=False
        )
    
    with multip.Pool(n_processes) as p:
        mapresult = p.map_async(
            _do_instances_slice,  # get the properties of all blobs in a slice
            enumerate(instances_slices)
        )
        blobs_per_slice = mapresult.get()
    
    for blobs in pbar(
        blobs_per_slice,
        max_value=n_slices,
        prefix="adjustments ",
    ):
        blobs['label'] = label_to_search * np.ones_like(blobs['label'])
        blobs['slice_idx'] = blobs['slice_idx'] * np.ones_like(blobs['label'])
        for k, v in blobs.items():
            if v.dtype in (np.float, np.float64, np.float32, np.float128):
                blobs[k] = v.astype(np.float16)  # reduce memory usage
        
    prop_keys = list(blobs_per_slice[0].keys())
    return {
        key: np.concatenate([
            blobs[key] for blobs in blobs_per_slice
        ])
        for key in pbar(
            prop_keys,
            max_value=len(prop_keys),
            prefix="concat "
        )
    }


get_2d_blobs_from_slices_only_porosity = functools.partial(
    get_2d_blobs_from_slices,
    label_to_search=2,
    n_processes=None,  # use all 
)

test_slices = z_slices[:2]
test_blobs = get_2d_blobs_from_slices_only_porosity(test_slices)

Writing bkp4.py


In [115]:
logger.info("z-slices")
blobs2d_z_porosity = get_2d_blobs_from_slices_only_porosity(z_slices)
blobs2d_z_porosity_df = pd.DataFrame(blobs2d_z_porosity)
blobs2d_z_porosity_df.to_csv("z-blobs")

slices 100% (1900 of 1900) |#############| Elapsed Time: 0:00:24 Time:  0:00:24
instance_slices 100% (1900 of 1900) |####| Elapsed Time: 0:00:00 Time:  0:00:00
adjustments 100% (1900 of 1900) |########| Elapsed Time: 0:00:00 Time:  0:00:00
concat 100% (22 of 22) |#################| Elapsed Time: 0:00:00 Time:  0:00:00


CPU times: user 41.6 s, sys: 1min 3s, total: 1min 45s
Wall time: 2min 14s


In [None]:
logger.info("y-slices")
blobs2d_y_porosity = get_2d_blobs_from_slices_only_porosity(y_slices)
blobs2d_y_porosity_df = pd.DataFrame(blobs2d_y_porosity)
blobs2d_y_porosity_df.to_csv("y-blobs")

In [None]:
logger.info("x-slices")
blobs2d_x_porosity = get_2d_blobs_from_slices_only_porosity(x_slices)
blobs2d_x_porosity_df = pd.DataFrame(blobs2d_x_porosity)
blobs2d_x_porosity_df.to_csv("x-blobs")

# Bkps of simpler versions of the main function

## bkp 1

In [None]:
from typing import Tuple, List

def get_2d_blobs_from_slices(slices_array: ndarray, label_to_search: int, properties: List[str]) -> list:
    
    instances_slices = (n_slices := slices_array.shape[0]) * [None]
    
    for idx, slice_ in pbar(
        enumerate(slices_array),
        max_value=n_slices,
        prefix="slices ",
    ):
        instances_slices[idx] = skimage.measure.label(
            slice_ == label_to_search, 
            connectivity=2, background=0, return_num=True
        )
    
    get_blob_props = functools.partial(
        skimage.measure.regionprops_table,
        cache=True,
        separator="-",
        properties=properties
    )
    
    blobs_per_slice = [
        get_blob_props(instances)
        for instances, n_instances in pbar(
            instances_slices,
            max_value=n_slices,
            prefix="instance_slices ",
        )
    ]
    
    for blobs in blobs_per_slice:
        blobs['label'] = label_to_search * np.ones_like(blobs['label'])
        
    return {
        key: np.concatenate([
            blobs[key] for blobs in blobs_per_slice
        ])
        for key in blobs_per_slice[0].keys()
    }

props = [
    "label", 
    "area", "bbox", "bbox_area", "centroid", "eccentricity", "euler_number", "extent",
    "filled_area", "inertia_tensor_eigvals", "local_centroid", "major_axis_length", "minor_axis_length",
    "perimeter", "solidity", 
    # todo add properties using the intensity image
]

get_2d_blobs_from_slices_only_porosity = functools.partial(
    get_2d_blobs_from_slices,
    label_to_search=2,
    properties=props
)

test_slices = z_slices[:2]
test_blobs = get_2d_blobs_from_slices_only_porosity(test_slices)


## bkp 2

## bkp3