# Measurement files UT Loading

Notebook to load Measurement files ut data into the databse

## Imports

In [1]:
import psycopg2
import numpy as np
#import a folder in the parent directory
import sys
sys.path.append('../')
import dbtools.dbtools as qrs
from pathlib import Path
import tifffile as tiff
import os
from tqdm import tqdm

## Auxiliar functions

In [2]:
def read_sequence2(folder_path):
    """
    Read a sequence of TIFF files in a folder as a 3D volume.

    Args:
    folder_path (str): Path to the folder containing TIFF files.

    Returns:
    numpy.ndarray: A 3D array where each slice corresponds to a TIFF file.
    """

    # List and sort the TIFF files
    tiff_files = sorted(
        [
            os.path.join(folder_path, f)
            for f in os.listdir(folder_path)
            if (f.endswith(".tiff") or f.endswith(".tif"))
        ]
    )

    # Get the total number of TIFF files
    total_files = len(tiff_files)

    # Read each TIFF file and update progress
    volume = []

    for i, file_path in tqdm(enumerate(tiff_files), total=total_files, desc="Reading TIFF files"):
        slice_data = tiff.imread(file_path)
        volume.append(slice_data)

    return np.array(volume)

## Connection

In [3]:
try:
    # Connect to the PostgreSQL database
    conn = qrs.connect()
    print("Connected to the database")

except (Exception, psycopg2.DatabaseError) as error:
    print(error)

Connected to the database


## File loading

Here we have to load the file of the measurement to be saved in the database.

The parent file path has to be explained in depth: Measurements include more than one sample, but then the files are cropped so each sample can be treated individually. Parent file represents the file from where the one being saved is extracted from.

If the file being saved is not extracted from any other, set paren_file_path to None

In [4]:
parent_file_path = Path(r'\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\02_XCT_data\Juan Ignacio\probetas\5\volume_eq')

file_path = Path(r'\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\02_XCT_data\Juan Ignacio\probetas\5\volume_eq_rotated')

# Check if the path is a directory or a file
if file_path.is_dir():
    # If it's a directory, read all TIFF files in the directory
    file = read_sequence2(file_path)
else:
    # If it's a file, read the single TIFF file
    file = tiff.imread(file_path)

Reading TIFF files: 100%|██████████| 3224/3224 [00:41<00:00, 77.54it/s]


## Parent file ID

A measurement file can be extracted from another one, for exmaple in UT we measure in the same inspection samples 1,2,3 and 4. An then we crop them individually from the initial file so we have a separate file for each of the samples.

This relationship let us keep track of this process.

However if the file is the initial one, there is no need to fill this paremeter

In [9]:
if parent_file_path is not None:

    parent_file_id_table_name = 'measurements'

    parent_file_data = qrs.get_data_metadata(parent_file_id_table_name)

    parent_file_row = parent_file_data[parent_file_data['file_path_measurement'] == str(parent_file_path)]

    parent_file_id = parent_file_row['id_measurement'].values[0]

    measurement_type_id = parent_file_row['measurementtype_id_measurement'].values[0]

    print(f"Parent file ID: {parent_file_id}, Measurement Type ID: {measurement_type_id}")

Parent file ID: 27, Measurement Type ID: 2


# Measurement type data

We have to set the measurment type id, in this case is the ultrasound 2024 JI methodology so the id is 1.

In case you are not sure of the mesaurement type id, you can check it in the measurement_type table in the next cell.

Remember that if there is a parent file, the measurement_type id will be forced to be the same as the paren measurement.

In [10]:
parent_table_name = 'measurementtypes'

parent_id_column = parent_table_name[:-1] + '_id'

parent_data = qrs.get_data(parent_table_name)

parent_data

Unnamed: 0,id_measurementtype,name_measurementtype,description_measurementtype
0,2,XCT_2024,"First XCT measurements, 2024 metodology."
1,3,UT_2024,"UT measurements, 2024 metodology. Used for the..."
2,4,UT_2025_1,"UT measurements, 2025 metodology. Measuring at..."


In [11]:
#we set the id
parent_id = 2

if parent_file_path is not None:

    parent_id = measurement_type_id

print(f"Using measurementtype: {parent_id}")

Using measurementtype: 2


## Measurement data

Now we have to set the attributes and metadata of the measurements:

1. Main attributes

    1. Sample_names: Its a list with the names of all the samples present in the file. If the names are not written as in the database it wont work so be precise.

    2. file_path: The file location

    3. Parent_file_id: In this variable save the path to the parent file. A parent file is the file where the file we want to save was precomputed from, for example the amplitude file is computed from the rf file. This will be automatically extracted later.

2. Metadata

    1. Dimensions: Dimensions are automatically computed from the loaded file

    2. Dtype: The precision type of the file

    3. File_type: The file extension, if its a folder of tiffs the type is folder
    

In [13]:
sample_names = ['JI_5']

main_parameters = {'file_path':file_path,parent_id_column:parent_id}

# metadata

#for each parameter of the measurement a metadata has to be created

metadata_parameters = []

#dimensions
metadata_parameters.append({'key':'height', 'value':str(file.shape[0]), 'type':'cardinal'})

metadata_parameters.append({'key':'width', 'value':str(file.shape[1]), 'type':'cardinal'})

metadata_parameters.append({'key':'depth', 'value':str(file.shape[2]), 'type':'cardinal'})

#parent id
main_parameters['parent_measurement_id'] = parent_file_id

#dtype

metadata_parameters.append({'key':'dtype', 'value':str(file.dtype), 'type':'nominal'})

#file type

if file_path.is_dir():
    metadata_parameters.append({'key':'file_type', 'value':'folder', 'type':'nominal'})
else:
    metadata_parameters.append({'key':'file_type', 'value':file_path.suffix[1:], 'type':'nominal'})

#zflip
metadata_parameters.append({'key':'zflip', 'value':'True', 'type':'boolean'})

#vflip
metadata_parameters.append({'key':'vflip', 'value':'True', 'type':'boolean'})

#aligned

metadata_parameters.append({'key':'aligned', 'value':'False', 'type':'boolean'})

#equalized

metadata_parameters.append({'key':'equalized', 'value':'True', 'type':'boolean'})

## Load into the table

In [14]:
print('Parameters to be inserted: ')
for key, value in main_parameters.items():
    print(f"-    {key}: {value}")

Parameters to be inserted: 
-    file_path: \\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\02_XCT_data\Juan Ignacio\probetas\5\volume_eq_rotated
-    measurementtype_id: 2
-    parent_measurement_id: 27


In [28]:
table_name = 'measurements'

# Extract column names and values from the attributes dictionary
columns = ', '.join(main_parameters.keys())
values = ', '.join([f"'{v}'" for v in main_parameters.values()])

# Construct the SQL INSERT statement
sql = f"INSERT INTO {table_name} ({columns}) VALUES ({values})"

print(sql)

# Create a cursor object using the cursor() method
cursor = conn.cursor()

# Execute the SQL statement
cursor.execute(sql)

cursor.execute('COMMIT')

cursor.close()

INSERT INTO measurements (file_path, measurementtype_id, parent_measurement_id) VALUES ('\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\02_XCT_data\Juan Ignacio\probetas\5\volume_eq_rotated', '2', '27')


## Make sure insert was correct

In [29]:
data = qrs.data_parent(table_name,parent_table_name,parent_id_column)

data[data['file_path_measurement'] == str(file_path)]

Unnamed: 0,id_measurement,file_path_measurement,parent_measurement_id_measurement,height_measurement,width_measurement,depth_measurement,dtype_measurement,file_type_measurement,signal_type_measurement,aligned_measurement,...,X Resolution_measurementtype,Y Resolution_measurementtype,transducer z position_measurementtype,gain_measurementtype,sound velocity in water_measurementtype,sound velocity in sample_measurementtype,transducer movement speed_measurementtype,sample transducer distance_measurementtype,Nominal Frequency_measurementtype,transducer diameter_measurementtype
39,61,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,27.0,,,,,,,,...,,,,,,,,,,


Now we store the id of the inserted row, to insert its related metadata

In [30]:
row_id = data['id_measurement'].values[-1]

print(f"Row ID: {row_id}")

Row ID: 61


## Load into the metadata table

In [31]:
metadata_table_name =  table_name[:-1] + '_metadata'

for attributes in metadata_parameters:

    attributes[table_name[:-1] + '_id'] = row_id

    # Extract column names and values from the attributes dictionary
    columns = ', '.join(attributes.keys())
    values = ', '.join([f"'{v}'" for v in attributes.values()])

    # Construct the SQL INSERT statement
    sql = f"INSERT INTO {metadata_table_name} ({columns}) VALUES ({values})"

    print(sql)

    # Create a cursor object using the cursor() method
    cursor = conn.cursor()

    # Execute the SQL statement
    cursor.execute(sql)

    cursor.execute('COMMIT')

    cursor.close()

INSERT INTO measurement_metadata (key, value, type, measurement_id) VALUES ('height', '3224', 'cardinal', '61')
INSERT INTO measurement_metadata (key, value, type, measurement_id) VALUES ('width', '1629', 'cardinal', '61')
INSERT INTO measurement_metadata (key, value, type, measurement_id) VALUES ('depth', '239', 'cardinal', '61')
INSERT INTO measurement_metadata (key, value, type, measurement_id) VALUES ('dtype', 'uint8', 'nominal', '61')
INSERT INTO measurement_metadata (key, value, type, measurement_id) VALUES ('file_type', 'folder', 'nominal', '61')
INSERT INTO measurement_metadata (key, value, type, measurement_id) VALUES ('zflip', 'True', 'boolean', '61')
INSERT INTO measurement_metadata (key, value, type, measurement_id) VALUES ('vflip', 'True', 'boolean', '61')
INSERT INTO measurement_metadata (key, value, type, measurement_id) VALUES ('aligned', 'False', 'boolean', '61')
INSERT INTO measurement_metadata (key, value, type, measurement_id) VALUES ('equalized', 'True', 'boolean',

## Make sure insert was correct

In [32]:
data = qrs.data_parent(table_name,parent_table_name,parent_id_column)

data[data['file_path_measurement'] == str(file_path)]

Unnamed: 0,id_measurement,file_path_measurement,parent_measurement_id_measurement,height_measurement,width_measurement,depth_measurement,dtype_measurement,file_type_measurement,signal_type_measurement,aligned_measurement,...,X Resolution_measurementtype,Y Resolution_measurementtype,transducer z position_measurementtype,gain_measurementtype,sound velocity in water_measurementtype,sound velocity in sample_measurementtype,transducer movement speed_measurementtype,sample transducer distance_measurementtype,Nominal Frequency_measurementtype,transducer diameter_measurementtype
39,61,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,27.0,3224 cardinal,1629 cardinal,239 cardinal,uint8 nominal,folder nominal,,False boolean,...,,,,,,,,,,


## Load in the sample measurement table

In a measurement from 1 to n samples can be present.

We save them in the sample_measurements table.

We have to locate the samples that are present in this measurement to load them in a table that relates them.

We need the ids of the samples so we filter the samples table to get the ids of the samples in the samples names list

In [33]:
samples_data = qrs.get_data_metadata('samples')

#get the ids of the samples in sample_names
samples_data = samples_data[samples_data['name_sample'].isin(sample_names)]

sample_ids = samples_data['id_sample'].values.tolist()

print('sample_ids', sample_ids)

sample_ids [2]


In [34]:
relational_table_name = 'sample_measurements'

for sample_id in sample_ids:

    relational_parameters = {'sample_id': sample_id, 'measurement_id': row_id}

    # Extract column names and values from the attributes dictionary
    columns = ', '.join(relational_parameters.keys())
    values = ', '.join([f"'{v}'" for v in relational_parameters.values()])

    # Construct the SQL INSERT statement
    sql = f"INSERT INTO {relational_table_name} ({columns}) VALUES ({values})"

    print(sql)

    # Create a cursor object using the cursor() method
    cursor = conn.cursor()

    # Execute the SQL statement
    cursor.execute(sql)

    cursor.execute('COMMIT')

    cursor.close()

INSERT INTO sample_measurements (sample_id, measurement_id) VALUES ('2', '61')


## Make sure insert was correct

In [35]:
data = qrs.relation_metadata(table_name,'samples',relational_table_name)

# data[data['file_path_measurement'] == str(file_path)]

data

Unnamed: 0,id_measurement,file_path_measurement,parent_measurement_id_measurement,measurementtype_id_measurement,height_measurement,width_measurement,depth_measurement,dtype_measurement,file_type_measurement,signal_type_measurement,...,vflip_measurement,equalized_measurement,id_sample,name_sample,panel_id_sample,geometry_type_sample,height_sample,width_sample,thickness_sample,keyhole_sample
0,1,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,,3,505 cardinal,203 cardinal,372 cardinal,uint8 nominal,tif nominal,RF nominal,...,,,1,JI_4,1,parallel nominal,150 mm,40 mm,5 mm,True Boolean
1,1,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,,3,505 cardinal,203 cardinal,372 cardinal,uint8 nominal,tif nominal,RF nominal,...,,,2,JI_5,1,parallel nominal,150 mm,40 mm,5 mm,True Boolean
2,1,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,,3,505 cardinal,203 cardinal,372 cardinal,uint8 nominal,tif nominal,RF nominal,...,,,3,JI_7,1,parallel nominal,150 mm,40 mm,5 mm,True Boolean
3,1,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,,3,505 cardinal,203 cardinal,372 cardinal,uint8 nominal,tif nominal,RF nominal,...,,,4,JI_8,1,parallel nominal,150 mm,40 mm,5 mm,True Boolean
4,3,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,1.0,3,505 cardinal,120 cardinal,45 cardinal,uint8 nominal,tif nominal,RF nominal,...,,,1,JI_4,1,parallel nominal,150 mm,40 mm,5 mm,True Boolean
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,48,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,43.0,4,448 cardinal,126 cardinal,49 cardinal,uint8 nominal,tif nominal,RF nominal,...,,,6,JI_10,1,parallel nominal,150 mm,40 mm,5 mm,True Boolean
60,49,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,43.0,4,448 cardinal,125 cardinal,50 cardinal,uint8 nominal,tif nominal,RF nominal,...,,,7,JI_11,1,parallel nominal,150 mm,40 mm,5 mm,True Boolean
61,50,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,43.0,4,448 cardinal,121 cardinal,49 cardinal,uint8 nominal,tif nominal,RF nominal,...,,,8,JI_12,1,parallel nominal,150 mm,40 mm,5 mm,True Boolean
62,60,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,26.0,2,3224 cardinal,1610 cardinal,240 cardinal,uint8 nominal,folder nominal,,...,True boolean,True boolean,1,JI_4,1,parallel nominal,150 mm,40 mm,5 mm,True Boolean
