# imports

In [None]:
import sys
sys.path.append(r'\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\10_code\UTvsXCT-preprocessing')
from preprocess_tools import onlypores , datasetmaker, io, aligner, register, reslicer
import numpy as np
from dbtools import dbtools as db
from dbtools import load as load
import pandas as pd
from pathlib import Path
import ast

# Database conection

In [None]:
try:
    conn = db.connect()
    print("Connected to the database")
except Exception as error:
    print(error)

# Select measurements to create the datasets

## measurement type 

In [None]:
measurementtypes_table = db.get_data_metadata('measurementtypes')

measurementtypes_table

In [None]:
ut_type = 5

xct_type = 6

## Selecting UT measurements

In [None]:
ut_measurements_table = db.get_data_metadata('measurements')

In [None]:
ut_measurements_table = ut_measurements_table[ut_measurements_table['measurementtype_id_measurement'] == ut_type]

ut_measurements_table

## Selecting XCT measurements

In [None]:
xct_measurements_table = db.get_data_metadata('measurements')

In [None]:
xct_measurements_table = xct_measurements_table[xct_measurements_table['measurementtype_id_measurement'] == xct_type]

xct_measurements_table

## Getting registered pairs

In [None]:
registrations_table = db.get_data_metadata('registrations')

registrations_table

In [None]:
reference_measurements = []
registered_measurements = []
registration_ids = []

for index, row in registrations_table.iterrows():

    reference_id = row['reference_measurement_id_registration']
    registered_id = row['registered_measurement_id_registration']
    registration_id = row['id_registration']

    reference_measurements.append(ut_measurements_table[ut_measurements_table['id_measurement'] == reference_id].iloc[0])
    registered_measurements.append(xct_measurements_table[xct_measurements_table['id_measurement'] == registered_id].iloc[0])
    registration_ids.append(registration_id)

## Sample names

In [None]:
sample_measurements_table = db.relation_metadata('samples','measurements','sample_measurements')

sample_measurements_table

In [None]:
sample_names = []

for i in range(len(reference_measurements)):

    measurement = reference_measurements[i]
    sample_names.append(sample_measurements_table[sample_measurements_table['id_measurement'] == measurement['id_measurement']].iloc[0]['name_sample'])

# Datasettype selection

In [None]:
datasettype_table = db.get_data('datasettypes')

datasettype_table

In [None]:
datasettype = 3

# Discard already computed datasets

In [None]:
try:

    dataset_registrations_table = db.relation_metadata('datasets','registrations','dataset_registrations')

    dataset_registrations_table = dataset_registrations_table[dataset_registrations_table['datasettype_id_dataset'] == datasettype]

    dataset_registrations = dataset_registrations_table['id_registration'].values

except Exception as e:
    print("No dataset registrations found or error occurred:", e)
    dataset_registrations = []

# Saving folder

In [None]:
folder = Path(r'\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\04_ML_data\Juan Ignacio\2025 dataset')

# Patch sizes

In [None]:
patch_sizes = [3, 5, 7, 9]

# Resolutions

In [None]:
xct_resolution = float(measurementtypes_table[measurementtypes_table['id_measurementtype'] == xct_type]['voxel_size_measurementtype'].values[0].split(' ')[0])
ut_resolution = float(measurementtypes_table[measurementtypes_table['id_measurementtype'] == ut_type]['x_resolution_measurementtype'].values[0].split(' ')[0])

# Dataset Generation

In [31]:
for i in range(len(registration_ids)):
    reference_measurement_path = reference_measurements[i]['file_path_measurement']
    registered_measurement_path = registered_measurements[i]['file_path_measurement']
    registration_id = registration_ids[i]
    sample_name = sample_names[i]

    if registration_id in dataset_registrations:
        print(f"Dataset for registration {registration_id} already exists, skipping...")
        continue

    print(f"Creating dataset for registration {registration_id} with reference measurement {reference_measurement_path} and registered measurement {registered_measurement_path}")

    #load the reference and registered measurements
    ut_volume = io.load_tif(reference_measurement_path)
    xct_volume = io.load_tif(registered_measurement_path)

    #xct_volume is z,y,x, so we need to transpose it to z,y,z
    xct_volume = np.transpose(xct_volume, (1, 2, 0))
    #ut_volume is z,y,x, so we need to transpose it to z,y,z
    ut_volume = np.transpose(ut_volume, (1, 2, 0))

    #apply the registration to the xct volume
    registration_parameters = registrations_table[registrations_table['id_registration'] == registration_id]['registration_matrix_registration'].values[0]
    registration_parameters = np.array(ast.literal_eval(registration_parameters))
    
    xct_volume = register.apply_registration(ut_volume,xct_volume,registration_parameters,ut_resolution,xct_resolution,parallel=True)

    #get the frontwall and backwall of the xct volume
    _,frontwall,backwall = aligner.crop_walls(xct_volume)

    #back to z,y,x
    xct_volume = np.transpose(xct_volume, (2, 0, 1))
    ut_volume = np.transpose(ut_volume, (2, 0, 1))

    #compute the onlypores
    onlypores_volume,material_mask,_ = onlypores.onlypores(xct_volume,frontwall,backwall,min_size_filtering=8)

    #create a folder for the dataset
    dataset_folder = folder / f"{sample_name}"
    dataset_folder.mkdir(parents=True, exist_ok=True)

    #compute the datasets
    
    for patch_size in patch_sizes:

        reconstruction_shape,df = datasetmaker.main(onlypores_volume,material_mask,ut_volume,xct_resolution,ut_resolution,ut_patch_size=patch_size, ut_step_size=1)

        #save the dataset
        dataset_path = dataset_folder / f"patch_size_{patch_size}_volfrac_areafrac.csv"
        df.to_csv(dataset_path, index=False)

        #save into the database
        rows = len(df)
        targets = ['volfrac','areafrac']
        description = 'Created with the production notebook in preprocess tools v 0.1.17'

        load.load_dataset(conn,
                          datasettype_id=datasettype,
                          file_path=str(dataset_path),
                          rows=rows,
                          patch_size=str(patch_size),
                          targets=targets,
                          reconstruction_shape=reconstruction_shape,
                          registration_ids = [registration_id],
                          description=description
                          )

        print(f"Dataset for registration {registration_id} with patch size {patch_size} saved to {dataset_path}")

Creating dataset for registration 48 with reference measurement \\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\03_UT_data\Probetas JI\probetas\2025 methodology\4\4.tif and registered measurement \\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\02_XCT_data\Juan Ignacio\probetas\4\volume_eq_rotated_aligned.tif
Applying transformation


Transforming slices: 100%|██████████| 203/203 [00:18<00:00, 10.73it/s]


Transformation applied
Material mask memory analysis:
  Volume size: 1.81 GB
  Available memory: 219.86 GB
  Required estimate: 3.63 GB
  Using parallel implementation...
Computing material mask using parallel processing...
Processing 16 chunks in parallel...
Material mask generation complete.
Starting pore detection analysis...
Computing volume bounding box...
Original volume shape: (203, 5000, 1920)
Data bounding box: Z[0:202], Y[886:4126], X[146:1785]
Cropped volume shape: (203, 3245, 1644)
Applying Sauvola adaptive thresholding...
Volume size: 1.01 GB
Available memory: 198.36 GB
Required memory estimate: 2.02 GB
Using parallel implementation...
Window size adjusted to 31 (must be odd)
Applying Sauvola thresholding with parallel processing...
Excluding front wall: slices 0 to 8
Excluding back wall: slices 175 to end
Generating material mask...
Material mask memory analysis:
  Volume size: 1.01 GB
  Available memory: 196.17 GB
  Required estimate: 2.02 GB
  Using parallel implementat

Transforming slices: 100%|██████████| 192/192 [00:22<00:00,  8.54it/s]


Transformation applied
Material mask memory analysis:
  Volume size: 1.76 GB
  Available memory: 219.72 GB
  Required estimate: 3.52 GB
  Using parallel implementation...
Computing material mask using parallel processing...
Processing 16 chunks in parallel...
Material mask generation complete.
Starting pore detection analysis...
Computing volume bounding box...
Original volume shape: (192, 5240, 1880)
Data bounding box: Z[0:191], Y[1021:4315], X[52:1821]
Cropped volume shape: (192, 3299, 1774)
Applying Sauvola adaptive thresholding...
Volume size: 1.05 GB
Available memory: 198.93 GB
Required memory estimate: 2.09 GB
Using parallel implementation...
Window size adjusted to 31 (must be odd)
Applying Sauvola thresholding with parallel processing...
Excluding front wall: slices 0 to 8
Excluding back wall: slices 177 to end
Generating material mask...
Material mask memory analysis:
  Volume size: 1.05 GB
  Available memory: 196.77 GB
  Required estimate: 2.09 GB
  Using parallel implementat

Transforming slices: 100%|██████████| 196/196 [00:21<00:00,  9.31it/s]


Transformation applied
Material mask memory analysis:
  Volume size: 1.88 GB
  Available memory: 220.27 GB
  Required estimate: 3.75 GB
  Using parallel implementation...
Computing material mask using parallel processing...
Processing 16 chunks in parallel...
Material mask generation complete.
Starting pore detection analysis...
Computing volume bounding box...
Original volume shape: (196, 5040, 2040)
Data bounding box: Z[0:195], Y[892:4236], X[77:2001]
Cropped volume shape: (196, 3349, 1929)
Applying Sauvola adaptive thresholding...
Volume size: 1.18 GB
Available memory: 198.14 GB
Required memory estimate: 2.36 GB
Using parallel implementation...
Window size adjusted to 31 (must be odd)
Applying Sauvola thresholding with parallel processing...
Excluding front wall: slices 0 to 7
Excluding back wall: slices 173 to end
Generating material mask...
Material mask memory analysis:
  Volume size: 1.18 GB
  Available memory: 195.73 GB
  Required estimate: 2.36 GB
  Using parallel implementati

Transforming slices: 100%|██████████| 202/202 [00:21<00:00,  9.59it/s]


Transformation applied
Material mask memory analysis:
  Volume size: 1.92 GB
  Available memory: 219.98 GB
  Required estimate: 3.84 GB
  Using parallel implementation...
Computing material mask using parallel processing...
Processing 16 chunks in parallel...
Material mask generation complete.
Starting pore detection analysis...
Computing volume bounding box...
Original volume shape: (202, 5000, 2040)
Data bounding box: Z[0:201], Y[888:4239], X[91:2009]
Cropped volume shape: (202, 3356, 1923)
Applying Sauvola adaptive thresholding...
Volume size: 1.21 GB
Available memory: 197.47 GB
Required memory estimate: 2.43 GB
Using parallel implementation...
Window size adjusted to 31 (must be odd)
Applying Sauvola thresholding with parallel processing...
Excluding front wall: slices 0 to 8
Excluding back wall: slices 176 to end
Generating material mask...
Material mask memory analysis:
  Volume size: 1.21 GB
  Available memory: 194.99 GB
  Required estimate: 2.43 GB
  Using parallel implementati

Transforming slices: 100%|██████████| 205/205 [00:20<00:00,  9.93it/s]


Transformation applied
Material mask memory analysis:
  Volume size: 1.89 GB
  Available memory: 219.88 GB
  Required estimate: 3.77 GB
  Using parallel implementation...
Computing material mask using parallel processing...
Processing 16 chunks in parallel...
Material mask generation complete.
Starting pore detection analysis...
Computing volume bounding box...
Original volume shape: (205, 5040, 1960)
Data bounding box: Z[0:204], Y[1011:4270], X[188:1836]
Cropped volume shape: (205, 3264, 1653)
Applying Sauvola adaptive thresholding...
Volume size: 1.03 GB
Available memory: 198.37 GB
Required memory estimate: 2.06 GB
Using parallel implementation...
Window size adjusted to 31 (must be odd)
Applying Sauvola thresholding with parallel processing...
Excluding front wall: slices 0 to 7
Excluding back wall: slices 173 to end
Generating material mask...
Material mask memory analysis:
  Volume size: 1.03 GB
  Available memory: 195.73 GB
  Required estimate: 2.06 GB
  Using parallel implementa

Transforming slices: 100%|██████████| 203/203 [00:21<00:00,  9.28it/s]


Transformation applied
Material mask memory analysis:
  Volume size: 1.89 GB
  Available memory: 220.18 GB
  Required estimate: 3.78 GB
  Using parallel implementation...
Computing material mask using parallel processing...
Processing 16 chunks in parallel...
Material mask generation complete.
Starting pore detection analysis...
Computing volume bounding box...
Original volume shape: (203, 5000, 2000)
Data bounding box: Z[0:202], Y[919:4174], X[205:1860]
Cropped volume shape: (203, 3260, 1660)
Applying Sauvola adaptive thresholding...
Volume size: 1.02 GB
Available memory: 198.54 GB
Required memory estimate: 2.05 GB
Using parallel implementation...
Window size adjusted to 31 (must be odd)
Applying Sauvola thresholding with parallel processing...
Excluding front wall: slices 0 to 12
Excluding back wall: slices 180 to end
Generating material mask...
Material mask memory analysis:
  Volume size: 1.02 GB
  Available memory: 196.30 GB
  Required estimate: 2.05 GB
  Using parallel implementa

Transforming slices: 100%|██████████| 200/200 [00:22<00:00,  8.94it/s]


Transformation applied
Material mask memory analysis:
  Volume size: 1.77 GB
  Available memory: 219.71 GB
  Required estimate: 3.53 GB
  Using parallel implementation...
Computing material mask using parallel processing...
Processing 16 chunks in parallel...
Material mask generation complete.
Starting pore detection analysis...
Computing volume bounding box...
Original volume shape: (200, 4840, 1960)
Data bounding box: Z[0:199], Y[635:3871], X[147:1760]
Cropped volume shape: (200, 3241, 1618)
Applying Sauvola adaptive thresholding...
Volume size: 0.98 GB
Available memory: 198.50 GB
Required memory estimate: 1.95 GB
Using parallel implementation...
Window size adjusted to 31 (must be odd)
Applying Sauvola thresholding with parallel processing...
Excluding front wall: slices 0 to 6
Excluding back wall: slices 174 to end
Generating material mask...
Material mask memory analysis:
  Volume size: 0.98 GB
  Available memory: 196.35 GB
  Required estimate: 1.95 GB
  Using parallel implementat