# Datsets UT Loading

Notebook to load Datsets ut data into the databse

## Imports

In [None]:
import psycopg2
import numpy as np
#import a folder in the parent directory
import sys
sys.path.append('../')
import queries.queries as qrs
from pathlib import Path
import tifffile as tiff
import pandas as pd
import os

## Connection

In [None]:
try:
    # Connect to the PostgreSQL database
    conn = qrs.connect()
    print("Connected to the database")

except (Exception, psycopg2.DatabaseError) as error:
    print(error)

Connected to the database


## File loading

We load the dataset to check the path is correct and to extract some info

In [36]:
file_path = Path(r'\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\04_ML_data\Juan Ignacio\JI_4\MonoElement\patch_vs_volfrac_3.csv')

df = pd.read_csv(file_path)

df.head(5)

Unnamed: 0,ut_rf_0,ut_rf_1,ut_rf_2,ut_rf_3,ut_rf_4,ut_rf_5,ut_rf_6,ut_rf_7,ut_rf_8,ut_rf_9,...,ut_rf_4537,ut_rf_4538,ut_rf_4539,ut_rf_4540,ut_rf_4541,ut_rf_4542,ut_rf_4543,ut_rf_4544,volfrac,areafrac
0,129.0,128.0,128.0,129.0,128.0,129.0,129.0,128.0,129.0,128.0,...,129.0,129.0,128.0,129.0,128.0,128.0,129.0,128.0,0.000644,0.068681
1,129.0,128.0,128.0,129.0,129.0,128.0,128.0,129.0,128.0,129.0,...,128.0,128.0,129.0,128.0,129.0,129.0,128.0,129.0,0.000468,0.050764
2,128.0,129.0,129.0,128.0,129.0,128.0,129.0,128.0,129.0,128.0,...,129.0,128.0,129.0,129.0,128.0,128.0,129.0,129.0,0.001459,0.107014
3,128.0,129.0,128.0,129.0,129.0,128.0,129.0,128.0,128.0,129.0,...,128.0,129.0,128.0,129.0,128.0,128.0,129.0,128.0,0.00135,0.094167
4,128.0,129.0,129.0,128.0,128.0,129.0,129.0,128.0,129.0,128.0,...,128.0,129.0,128.0,129.0,128.0,129.0,128.0,128.0,0.001306,0.091042


## Datset data

Now we have to set the attributes and metadata of the Datsets:

1. Main attributes

    1. file_paths: Its a list with the paths of all the present files in the dataset. If the paths are not written as in the database it wont work so be precise.

    2. file_path: The dataset location

2. Metadata

    1. rows: Number of instances in the dataset

    2. shape: Shape to reconstruct the images from the dataset.

    3. patch_size: Size of the patch

    4. target: Target of the dataset, may be Volfrac, Areafrac, or any xct extracted feature. Multiple targets can be in the same dataset.
    

In [37]:
file_paths = [Path(r'\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\02_XCT_data\Juan Ignacio\probetas\4\frontal_90right.tif'),Path(r'\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\03_UT_data\Probetas JI\probetas\4\4.tif')]

main_parameters = {'file_path':file_path}

description = 'Monoelement RF vs volfrac and areafrac. First functional group of datasets from 2024'

main_parameters['description'] = description

# metadata

#for each parameter of the Datset a metadata has to be created

metadata_parameters = []

#rows
metadata_parameters.append({'key':'rows', 'value':len(df), 'type':'cardinal'})

#shape
# metadata_parameters.append({'key':'shape', 'value':, 'type':'cardinal'})

#patch_size
metadata_parameters.append({'key':'patch_size', 'value':3, 'type':'pixels'})

#target
metadata_parameters.append({'key':'target', 'value':'volfrac', 'type':'nominal'})

#target
metadata_parameters.append({'key':'target2', 'value':'areafrac', 'type':'nominal'})

## Load into the table

In [38]:
print('Parameters to be inserted: ')
for key, value in main_parameters.items():
    print(f"-    {key}: {value}")

Parameters to be inserted: 
-    file_path: \\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\04_ML_data\Juan Ignacio\JI_4\MonoElement\patch_vs_volfrac_3.csv
-    description: Monoelement RF vs volfrac and areafrac. First functional group of datasets from 2024


In [39]:
table_name = 'datasets'

# Extract column names and values from the attributes dictionary
columns = ', '.join(main_parameters.keys())
values = ', '.join([f"'{v}'" for v in main_parameters.values()])

# Construct the SQL INSERT statement
sql = f"INSERT INTO {table_name} ({columns}) VALUES ({values})"

print(sql)

# Create a cursor object using the cursor() method
cursor = conn.cursor()

# Execute the SQL statement
cursor.execute(sql)

cursor.execute('COMMIT')

cursor.close()

INSERT INTO datasets (file_path, description) VALUES ('\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\04_ML_data\Juan Ignacio\JI_4\MonoElement\patch_vs_volfrac_3.csv', 'Monoelement RF vs volfrac and areafrac. First functional group of datasets from 2024')


## Make sure insert was correct

In [None]:
data = qrs.get_data_metadata(table_name)

data

Unnamed: 0,id_dataset,file_path_dataset,description_dataset
0,1,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,Monoelement RF vs volfrac and areafrac. First ...


In [41]:
row_id = data['id_dataset'].values[-1]

print(f"Row ID: {row_id}")

Row ID: 1


## Load into the metadata table

In [None]:
metadata_table_name =  table_name[:-1] + '_metadata'

for attributes in metadata_parameters:

    attributes[table_name[:-1] + '_id'] = row_id

    # Extract column names and values from the attributes dictionary
    columns = ', '.join(attributes.keys())
    values = ', '.join([f"'{v}'" for v in attributes.values()])

    # Construct the SQL INSERT statement
    sql = f"INSERT INTO {metadata_table_name} ({columns}) VALUES ({values})"

    print(sql)

    # Create a cursor object using the cursor() method
    cursor = conn.cursor()

    # Execute the SQL statement
    cursor.execute(sql)

    cursor.execute('COMMIT')

    cursor.close()

INSERT INTO dataset_metadata (key, value, type, dataset_id) VALUES ('rows', '2686', 'cardinal', '1')
INSERT INTO dataset_metadata (key, value, type, dataset_id) VALUES ('patch_size', '3', 'pixels', '1')
INSERT INTO dataset_metadata (key, value, type, dataset_id) VALUES ('target', 'volfrac', 'nominal', '1')
INSERT INTO dataset_metadata (key, value, type, dataset_id) VALUES ('target2', 'areafrac', 'nominal', '1')


## Make sure insert was correct

In [None]:
data = qrs.get_data_metadata(table_name)

data

Unnamed: 0,id_dataset,file_path_dataset,description_dataset,rows_dataset,patch_size_dataset,target_dataset,target2_dataset
0,1,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,Monoelement RF vs volfrac and areafrac. First ...,2686 cardinal,3 pixels,volfrac nominal,areafrac nominal


## Load in the dataset measurement table

In a dataset from 1 to n samples can be present.

We save them in the dataset_measurements table.

We have to locate the measurement that contains the samples present in this dataset to link it.

In [None]:
measurement_data = qrs.get_data_metadata('measurements')

measurement_ids = []

for path in file_paths:

    measurement_ids.append(measurement_data[measurement_data['file_path_measurement'] == str(path)]['id_measurement'].values[0])

assert len(measurement_ids) == len(file_paths), 'The number of measurements is not the same as the number of file paths'

In [45]:
relational_table_name = 'dataset_measurements'

for measurement_id in measurement_ids:

    relational_parameters = {'dataset_id': row_id, 'measurement_id': measurement_id}

    # Extract column names and values from the attributes dictionary
    columns = ', '.join(relational_parameters.keys())
    values = ', '.join([f"'{v}'" for v in relational_parameters.values()])

    # Construct the SQL INSERT statement
    sql = f"INSERT INTO {relational_table_name} ({columns}) VALUES ({values})"

    print(sql)

    # Create a cursor object using the cursor() method
    cursor = conn.cursor()

    # Execute the SQL statement
    cursor.execute(sql)

    cursor.execute('COMMIT')

    cursor.close()

INSERT INTO dataset_measurements (dataset_id, measurement_id) VALUES ('1', '41')
INSERT INTO dataset_measurements (dataset_id, measurement_id) VALUES ('1', '3')


## Make sure insert was correct

In [None]:
data = qrs.relation_metadata(table_name,'measurements',relational_table_name)

data

Unnamed: 0,id_dataset,file_path_dataset,description_dataset,rows_dataset,patch_size_dataset,target_dataset,target2_dataset,id_measurement,file_path_measurement,parent_measurement_id_measurement,measurementtype_id_measurement,height_measurement,width_measurement,depth_measurement,dtype_measurement,file_type_measurement,signal_type_measurement,aligned_measurement
0,1,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,Monoelement RF vs volfrac and areafrac. First ...,2686 cardinal,3 pixels,volfrac nominal,areafrac nominal,41,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,26.0,2,225 cardinal,3279 cardinal,1542 cardinal,uint8 nominal,folder nominal,,True boolean
1,1,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,Monoelement RF vs volfrac and areafrac. First ...,2686 cardinal,3 pixels,volfrac nominal,areafrac nominal,3,\\192.168.10.106\imdea\DataDriven_UT_AlbertoVi...,1.0,3,505 cardinal,120 cardinal,45 cardinal,uint8 nominal,tif nominal,RF nominal,
