# Experiment Loading

## Imports

In [None]:
import psycopg2
from tabulate import tabulate
#import a folder in the parent directory
import sys
sys.path.append('../')
import dbtools.dbtools as qrs
import dbtools.load as load
from pathlib import Path

## Connection

In [None]:
try:
    # Connect to the PostgreSQL database
    conn = qrs.connect()
    print("Connected to the database")

except (Exception, psycopg2.DatabaseError) as error:
    print(error)

# Loading experiments

We want to load old experiments, so we will use the path to the folder to also extract all the models and load them into the database too.

In [None]:
experiment_path = Path(r'\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\05_Models\Juan Ignacio\2025\CNN\IQ\10 mm\top3_KFold7x7')

# description = "Top 3 configurations are trained using a KFold with K=5, each model is trained 5 times in each Fold. Then they are ranked based on the average of each metric. " \
# "Single signal inputed, signal type IQ, 10mm signal range" \
# "Signals are aligned arround the same point 50" \
# "Volfrac is divided in bins using np.histogram with auto in number of bins, then augmentation is applied to each bin until it reaches the same number of samples as the biggest bin." \
# "Data is divided in train test and sample. Using 1 sample for test, another for validation and the rest for training." \
# "Data is normalized based on training data, using minmax normalization for signals and volfrac." \
# "When sampled from the datagenerator each signal is randomly rolled, that is how the augmentation is applied." \
# "Each model is trained 5 times to get an average of the performance."

# description = "Top 10 bayesian found configurations are now trained 5 times each. Then they are ranked based on the average of each metric. " \
# "Single signal inputed, signal type IQ, 10mm signal range" \
# "Signals are aligned arround the same point 50" \
# "Volfrac is divided in bins using np.histogram with auto in number of bins, then augmentation is applied to each bin until it reaches the same number of samples as the biggest bin." \
# "Data is divided in train test and sample. Using 1 sample for test, another for validation and the rest for training." \
# "Data is normalized based on training data, using minmax normalization for signals and volfrac." \
# "When sampled from the datagenerator each signal is randomly rolled, that is how the augmentation is applied." \
# "Each model is trained 5 times to get an average of the performance."

description = ["Hyperband + Bayesian search. The top 10 configurations of the hyperband search are used to create the search space for Bayesian search, using the max an min of each as bounds." \
               "Test and validation is created by separating two samples from the dataset, then mixing them and splitting 80/20"]

author = "Alberto Vicente del Egido"

dataset_paths = [str(Path(path)) for path in [
    r'\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\04_ML_data\Juan Ignacio\10mm range\JI_4\MonoElement\patch_vs_volfrac_7.csv',
    r'\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\04_ML_data\Juan Ignacio\10mm range\JI_5\MonoElement\patch_vs_volfrac_7.csv',
    r'\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\04_ML_data\Juan Ignacio\10mm range\JI_7\MonoElement\patch_vs_volfrac_7.csv',
    r'\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\04_ML_data\Juan Ignacio\10mm range\JI_8\MonoElement\patch_vs_volfrac_7.csv',
    r'\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\04_ML_data\Juan Ignacio\10mm range\JI_10\MonoElement\patch_vs_volfrac_7.csv',
    r'\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\04_ML_data\Juan Ignacio\10mm range\JI_11\MonoElement\patch_vs_volfrac_7.csv',
    r'\\192.168.10.106\imdea\DataDriven_UT_AlbertoVicente\04_ML_data\Juan Ignacio\10mm range\JI_12\MonoElement\patch_vs_volfrac_7.csv'
]]

aditional_metadata = {
    "hyperband_objective": "val_loss",
    "hyperband_max_epochs": 200,
    "hyperband_factor":3,
    "hyperband_iterations":5,
    "hyperband_executions_per_trial":5,
    "bayesian_trials": 100,
    "bayesian_warmup":5,
    "bayesian_executions_per_trial":5
}

In [None]:
load.load_experiment(conn,str(experiment_path),description,author,dataset_paths)

# Loading models

In [None]:
#list all the folders in the experiment path
folders = [f for f in experiment_path.iterdir() if f.is_dir()]

#list only the folders that contain the word 'model'
folders = [f for f in folders if 'model' in f.name]

#names of the folders are in this format: 'model_1', 'model_2', etc. Order them by the number
folders.sort(key=lambda x: int(x.name.split('_')[1]))

In [None]:
architecture = '1D CNN'
description = '1D convolutional neural network, single signal input'

In [None]:
for folder in folders:
    #in the folder there is a file called 'model_info.txt'
    model_info_path = folder / 'model_info.txt'
    try:
        #read the file
        with open(model_info_path, 'r') as f:
            model_info = f.read().strip()
    except Exception as e:
        print(f"Error reading {model_info_path}: {e}")
        continue

    # the file is in this format: every line is key:value, but value may contain ':'
    model_info_dict = {}
    for line in model_info.split('\n'):
        if ':' in line:
            key, value = line.split(':', 1)
            model_info_dict[key.strip()] = value.strip()
    
    try:
        parameters = int(model_info_dict.get('Model Parameters', ''))
        trainable_parameters = int(model_info_dict.get('Trainable Parameters', ''))
        metrics = {}
        mse = float(model_info_dict.get('Mean Squared Error', '0'))
        metrics['mse'] = mse
        rmse = float(model_info_dict.get('Root Mean Squared Error', '0'))
        metrics['rmse'] = rmse
        mae = float(model_info_dict.get('Mean Absolute Error', '0'))
        metrics['mae'] = mae
        coverage = float(model_info_dict.get('Coverage', '0'))
        metrics['coverage'] = coverage
        bias = float(model_info_dict.get('Bias', '0'))
        metrics['bias'] = bias
        r2 = float(model_info_dict.get('R2', '0'))
        metrics['r2'] = r2
    except ValueError as e:
        print(f"Error processing model info for {model_info_path}: {e}")
        continue

    load.load_model(conn,experiment_folder_path=str(experiment_path), model_folder_path=str(folder), architecture=architecture, description=description,parameters=parameters,trainable_parameters=trainable_parameters,computed_metrics=metrics)