In [2]:
import glob
import re
import json
import itertools
import pandas as pd
from pathlib import Path

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
import joblib
from sklearn.model_selection import TimeSeriesSplit

# all the functions from helpers.py
from helpers_scenario2 import *

In [3]:
annotations_folder = '../data/raw/scenario_2/fold_0/train/annotations/'
# physiology_folder = "../data/preprocessed/cleaned/scenario_1/fold_0/train/physiology/" #'../data/raw/scenario_1/train/physiology/'
physiology_folder = "../data/preprocessed/cleaned/scenario_2/fold_0/train/physiology/" #'../data/raw/scenario_1/train/physiology/'

df_physiology = load_read_and_append_csvs(physiology_folder)
df_annotations = load_read_and_append_csvs(annotations_folder)

videos = df_physiology.video.unique()
subjects = df_physiology.subject.unique()

splits = split_subjects_train_test(subjects, 3)

In [4]:
from concurrent.futures import ProcessPoolExecutor, as_completed, ThreadPoolExecutor
import multiprocessing

num_cpu_cores = multiprocessing.cpu_count()

# Define aggregate metric combinations
aggregate_combinations = [
    # ['enlarged'],
    ['mean'],
    # ['std'],
    # ['max'],
    ['min'],
    ['mean', 'std'],
    ['mean', 'max'],
    ['mean', 'min'],
    # ['std', 'max'],
    # ['std', 'min'],
    # ['max', 'min'],
    ['mean', 'std', 'max', 'min']
]

# Define models and hyperparameters
models_hyperparameters = [
    # (LinearRegression, {}),
    # (SVR, {
    #     'kernel': ['linear', 'rbf'],
    #     'C': [0.1, 1, 10],
    #     'epsilon': [0.1, 1],
    #     'gamma': ['scale', 'auto'],  # Only used for 'rbf' kernel
    # }),
    (RandomForestRegressor, {
        'n_estimators': [50, 100],
        'max_depth': [10, None],
        'min_samples_split': [2, 5],
        'min_samples_leaf': [1, 2],
        'max_features': ['auto', 'sqrt'],
    }),
    (XGBRegressor, {
        'n_estimators': [50, 100],
        'max_depth': [6, 10],
    #     'learning_rate': [0.01, 0.1],
    #     'subsample': [0.5, 0.8],
        # 'colsample_bytree': [0.5, 0.8],
        # 'reg_alpha': [0, 0.1],
        # 'reg_lambda': [0.1, 1],
    }),
]

# Define a function to process a single hyperparameter set
def process_hp_set(hp_set, model, hyperparameters, iter_aggregate, splits, df_physiology_video, df_annotations_video):
    hp_dict = dict(zip(hyperparameters.keys(), hp_set))
    model_name = model.__name__

    print(f"Testing model: {model_name} with hyperparameters: {hp_dict} and aggregate: {iter_aggregate}")

    rmses = []
    for split in splits:
        print(split)

        X_train, X_test, y_train, y_test, numeric_column_indices, categorical_column_indices = preprocess(
            df_physiology_video.copy(), df_annotations_video.copy(), split=split, predictions_cols=['arousal', 'valence'], aggregate=iter_aggregate,
            window_duration=10000, resample_rate=100)

        rmse = time_series_cross_validation_with_hyperparameters(
            X_train, X_test, y_train, y_test, model, hp_dict, n_jobs=1,
            numeric_column_indices=numeric_column_indices,
            categorical_column_indices=categorical_column_indices)

        rmses.append(rmse)

    average_rmse = np.mean(rmses, axis=0)

    if y_train.ndim > 1 and y_train.shape[1] > 1:
        # Unpack the average_rmse array into separate keys in the result dictionary
        result = {
            'model': model_name,
            'hyperparameters': hp_dict,
            'aggregate': iter_aggregate,
            'average_rmse_arousal': average_rmse[0],
            'average_rmse_valence': average_rmse[1],
        }
    else:
        result = {
            'model': model_name,
            'hyperparameters': hp_dict,
            'aggregate': iter_aggregate,
            'average_rmse': average_rmse
        }

    return result

# Initialize an empty DataFrame for the best results and a dictionary for all results
best_results_df = pd.DataFrame()
all_results = {}

for video in videos:
    print(f"Processing video: {video}")

    df_physiology_video = df_physiology.loc[df_physiology.video == video]
    df_annotations_video = df_annotations.loc[df_annotations.video == video]

    results = []

    for iter_aggregate in aggregate_combinations:
        # Preprocess data

        for model, hyperparameters in models_hyperparameters:

            # Use ProcessPoolExecutor to parallelize the loop
            with ThreadPoolExecutor(max_workers=num_cpu_cores) as executor:
                # Prepare the list of arguments for each task
                tasks = [
                    (hp_set, model, hyperparameters, iter_aggregate, splits, df_physiology_video, df_annotations_video)
                    for hp_set in itertools.product(*hyperparameters.values())
                ]

                # Submit the tasks to the executor
                futures = [
                    executor.submit(process_hp_set, *task_args)
                    for task_args in tasks
                ]

                # Collect the results as they complete
                for future in as_completed(futures):
                    result = future.result()
                    results.append(result)

    results_df = pd.DataFrame(results)

    #if y_train.ndim > 1 and y_train.shape[1] > 1:
        # Find the best result for each output variable
    best_result_output_1 = results_df.loc[results_df['average_rmse_arousal'].idxmin()]
    best_result_output_2 = results_df.loc[results_df['average_rmse_valence'].idxmin()]

    # Concatenate the best results for each output variable to the best_results_df
    best_results_df =pd.concat([best_results_df, best_result_output_1.to_frame().T, best_result_output_2.to_frame().T], ignore_index=True)
    #else:
    #    best_result = results_df.loc[results_df['average_rmse'].idxmin()]
#
    #    best_results_df = best_results_df.append(best_result, ignore_index=True)

    all_results[f"{annotation_file}_{physiology_file}"] = results_df.to_dict(orient='records')

    # Save all_results as JSON
    with open('../results/scenario_2/clean_all_results_shallow_models.json', 'w') as f:
        json.dump(all_results, f, default=str, indent=4)

    # Save best_results_df as CSV
    best_results_df.to_csv('../results/scenario_2/clean_shallow_models_best_result.csv', index=False)

print("\nThe best combination of features and hyperparameters for each file pair is:")
print(best_results_df)



Processing video: 0


NameError: name 'aggregate_combinations' is not defined

In [4]:
# Define aggregate metric combinations
aggregate_combinations = [
    ['enlarged'],
    ['mean'],
    # ['std'],
    # ['max'],
    # ['min'],
    # ['mean', 'std'],
    # ['mean', 'max'],
    # ['mean', 'min'],
    # ['std', 'max'],
    # ['std', 'min'],
    # ['max', 'min'],
    # ['mean', 'std', 'max', 'min']
]

# Define models and hyperparameters
models_hyperparameters = [
    # (LinearRegression, {}),
    # (SVR, {
    #     'kernel': ['linear', 'rbf'],
    #     'C': [0.1, 1, 10],
    #     'epsilon': [0.1, 1],
    #     'gamma': ['scale', 'auto'],  # Only used for 'rbf' kernel
    # }),
    (RandomForestRegressor, {
        'n_estimators': [50, 100],
        'max_depth': [10, None],
        'min_samples_split': [2, 5],
        'min_samples_leaf': [1, 2],
        'max_features': ['auto', 'sqrt'],
    }),
    (XGBRegressor, {
        'n_estimators': [50, 100],
        'max_depth': [6, 10],
    #     'learning_rate': [0.01, 0.1],
    #     'subsample': [0.5, 0.8],
        # 'colsample_bytree': [0.5, 0.8],
        # 'reg_alpha': [0, 0.1],
        # 'reg_lambda': [0.1, 1],
    }),
]

# Initialize an empty DataFrame for the best results and a dictionary for all results
best_results_df = pd.DataFrame()
all_results = {}

for video in videos:
    print(f"Processing video: {video}")
    
    df_physiology_video = df_physiology.loc[df_physiology.video == video]
    df_annotations_video = df_annotations.loc[df_annotations.video == video]

    results = []
    
    for iter_aggregate in aggregate_combinations:
        # Preprocess data
        
        for model, hyperparameters in models_hyperparameters:
            for hp_set in itertools.product(*hyperparameters.values()):
                hp_dict = dict(zip(hyperparameters.keys(), hp_set))
                model_name = model.__name__

                print(f"Testing model: {model_name} with hyperparameters: {hp_dict} and aggregate: {iter_aggregate}")
                
                rmses = []
                for split in splits:
                    print(split)
                
                    X_train,X_test, y_train, y_test, numeric_column_indices, categorical_column_indices = preprocess(
                        df_physiology_video.copy(), df_annotations_video.copy(), split = split, predictions_cols=['arousal','valence'], aggregate=iter_aggregate,
                        window_duration=10000, resample_rate=100)

                    rmse = time_series_cross_validation_with_hyperparameters(
                        X_train, X_test, y_train, y_test,  model, hp_dict,  n_jobs=1,
                        numeric_column_indices=numeric_column_indices,
                        categorical_column_indices=categorical_column_indices)
                    
                    rmses.append(rmse)
                
                average_rmse = np.mean(rmses, axis=0)
                
                if  y_train.ndim > 1 and y_train.shape[1] > 1:
                    # Unpack the average_rmse array into separate keys in the result dictionary
                    result = {
                        'model': model_name,
                        'hyperparameters': hp_dict,
                        'aggregate': iter_aggregate,
                        'average_rmse_arousal': average_rmse[0],
                        'average_rmse_valence': average_rmse[1], 
                    }
                else:
                    result = {
                        'model': model_name,
                        'hyperparameters': hp_dict,
                        'aggregate': iter_aggregate,
                        'average_rmse': average_rmse
                    }
                results.append(result)

    results_df = pd.DataFrame(results)

    if  y_train.ndim > 1 and y_train.shape[1] > 1:
        # Find the best result for each output variable
        best_result_output_1 = results_df.loc[results_df['average_rmse_arousal'].idxmin()]
        best_result_output_2 = results_df.loc[results_df['average_rmse_valence'].idxmin()]

        # Concatenate the best results for each output variable to the best_results_df
        best_results_df = pd.concat([best_results_df, best_result_output_1.to_frame().T, best_result_output_2.to_frame().T], ignore_index=True)
    else:  
        best_result = results_df.loc[results_df['average_rmse'].idxmin()]
        
        best_results_df = best_results_df.append(best_result, ignore_index=True)

    all_results[f"{annotation_file}_{physiology_file}"] = results_df.to_dict(orient='records')
    
    # Save all_results as JSON
    with open('../results/scenario_2/clean_all_results_shallow_models.json', 'w') as f:
        json.dump(all_results, f, default=str, indent=4)

    # Save best_results_df as CSV
    best_results_df.to_csv('../results/scenario_2/clean_shallow_models_best_result.csv', index=False)


print("\nThe best combination of features and hyperparameters for each file pair is:")
print(best_results_df)

Processing video: 0
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'auto'} and aggregate: ['enlarged']
{'train': [31, 2, 45, 7, 5, 32, 18, 11, 17, 0, 30, 28, 27, 24, 20, 9], 'test': array([43, 41,  1, 34, 13, 29, 26, 15], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.65093277 2.16972022]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [2.40498257 1.61700246]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.49464396 1.2986428 ]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'sqrt'} and aggregate: ['enlarged']
{'train': [31, 2, 45, 7, 5, 32, 18, 11, 17, 0, 30, 28, 27, 24, 20, 9], 'test': array([43, 41,  1, 34, 13, 29, 26, 15], dtype=int64)}
Average Root Mean Squared Error per output: [1.45202517 1.74173056]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}
Average Root Mean Squared Error per output: [1.71885026 1.66810826]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}
Average Root Mean Squared Error per output: [1.3344826  1.21560157]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': 10, 'min_samples_split': 2, 'min_samp

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.67490758 2.22115923]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [2.40857629 1.62387147]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.49832247 1.30201438]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt'} and aggregate: ['enlarged']
{'train': [31, 2, 45, 7, 5, 32, 18, 11, 17, 0, 30, 28, 27, 24, 20, 9], 'test': array([43, 41,  1, 34, 13, 29, 26, 15], dtype=int64)}
Average Root Mean Squared Error per output: [1.45837759 1.72768409]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}
Average Root Mean Squared Error per output: [1.72197946 1.65661731]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}
Average Root Mean Squared Error per output: [1.33020309 1.23121309]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': 10, 'min_samples_split': 5, 'min_samp

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.68913476 2.1956626 ]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [2.40977403 1.62651507]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.50432946 1.29635023]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': 10, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'sqrt'} and aggregate: ['enlarged']
{'train': [31, 2, 45, 7, 5, 32, 18, 11, 17, 0, 30, 28, 27, 24, 20, 9], 'test': array([43, 41,  1, 34, 13, 29, 26, 15], dtype=int64)}
Average Root Mean Squared Error per output: [1.46102701 1.73355671]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}
Average Root Mean Squared Error per output: [1.7232734  1.65291906]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}
Average Root Mean Squared Error per output: [1.33673531 1.22158494]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': 10, 'min_samples_split': 5, 'min_samp

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.68480264 2.20963765]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [2.40594964 1.64084573]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.51810315 1.3042963 ]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': 10, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'sqrt'} and aggregate: ['enlarged']
{'train': [31, 2, 45, 7, 5, 32, 18, 11, 17, 0, 30, 28, 27, 24, 20, 9], 'test': array([43, 41,  1, 34, 13, 29, 26, 15], dtype=int64)}
Average Root Mean Squared Error per output: [1.46569948 1.70059473]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}
Average Root Mean Squared Error per output: [1.75744398 1.64726812]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}
Average Root Mean Squared Error per output: [1.33012502 1.23205496]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': None, 'min_samples_split': 2, 'min_sa

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.69639473 2.22072147]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [2.40970915 1.61415724]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.49785221 1.29934807]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'sqrt'} and aggregate: ['enlarged']
{'train': [31, 2, 45, 7, 5, 32, 18, 11, 17, 0, 30, 28, 27, 24, 20, 9], 'test': array([43, 41,  1, 34, 13, 29, 26, 15], dtype=int64)}
Average Root Mean Squared Error per output: [1.48661172 1.70698283]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}
Average Root Mean Squared Error per output: [1.79185441 1.66494269]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}
Average Root Mean Squared Error per output: [1.32179669 1.21134936]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': None, 'min_samples_split': 2, 'min_

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.65282816 2.2396825 ]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [2.410002   1.61518439]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.50398341 1.29469763]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt'} and aggregate: ['enlarged']
{'train': [31, 2, 45, 7, 5, 32, 18, 11, 17, 0, 30, 28, 27, 24, 20, 9], 'test': array([43, 41,  1, 34, 13, 29, 26, 15], dtype=int64)}
Average Root Mean Squared Error per output: [1.4967294  1.72497663]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}
Average Root Mean Squared Error per output: [1.74379942 1.681544  ]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}
Average Root Mean Squared Error per output: [1.32515675 1.21431645]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': None, 'min_samples_split': 5, 'min_

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.66160094 2.19702844]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [2.40872865 1.61189157]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.51009673 1.29562027]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': None, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'sqrt'} and aggregate: ['enlarged']
{'train': [31, 2, 45, 7, 5, 32, 18, 11, 17, 0, 30, 28, 27, 24, 20, 9], 'test': array([43, 41,  1, 34, 13, 29, 26, 15], dtype=int64)}
Average Root Mean Squared Error per output: [1.47627221 1.69373039]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}
Average Root Mean Squared Error per output: [1.75267845 1.68019316]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}
Average Root Mean Squared Error per output: [1.32447039 1.21702084]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': None, 'min_samples_split': 5, 'min_

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.67692572 2.21506045]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [2.40981674 1.62186346]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.50860019 1.29905689]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 50, 'max_depth': None, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'sqrt'} and aggregate: ['enlarged']
{'train': [31, 2, 45, 7, 5, 32, 18, 11, 17, 0, 30, 28, 27, 24, 20, 9], 'test': array([43, 41,  1, 34, 13, 29, 26, 15], dtype=int64)}
Average Root Mean Squared Error per output: [1.48717123 1.7428824 ]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}
Average Root Mean Squared Error per output: [1.74818144 1.67373267]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}
Average Root Mean Squared Error per output: [1.32476171 1.21141403]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 100, 'max_depth': 10, 'min_samples_split': 2, 'min_s

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.65496788 2.20275652]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [2.40276588 1.61914908]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.50059251 1.29972925]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 100, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'sqrt'} and aggregate: ['enlarged']
{'train': [31, 2, 45, 7, 5, 32, 18, 11, 17, 0, 30, 28, 27, 24, 20, 9], 'test': array([43, 41,  1, 34, 13, 29, 26, 15], dtype=int64)}
Average Root Mean Squared Error per output: [1.45548524 1.72863037]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}
Average Root Mean Squared Error per output: [1.72237267 1.65172682]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}
Average Root Mean Squared Error per output: [1.32863688 1.21705734]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 100, 'max_depth': 10, 'min_samples_split': 2, 'min_sa

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.67339928 2.1886443 ]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [2.41040705 1.61895826]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.49648517 1.29880361]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 100, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt'} and aggregate: ['enlarged']
{'train': [31, 2, 45, 7, 5, 32, 18, 11, 17, 0, 30, 28, 27, 24, 20, 9], 'test': array([43, 41,  1, 34, 13, 29, 26, 15], dtype=int64)}
Average Root Mean Squared Error per output: [1.46890793 1.71401672]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}
Average Root Mean Squared Error per output: [1.71895443 1.64969887]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}
Average Root Mean Squared Error per output: [1.34000268 1.21870458]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 100, 'max_depth': 10, 'min_samples_split': 5, 'min_sa

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.66811199 2.18478363]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [2.41010674 1.61453126]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.5025922  1.30151852]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 100, 'max_depth': 10, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'sqrt'} and aggregate: ['enlarged']
{'train': [31, 2, 45, 7, 5, 32, 18, 11, 17, 0, 30, 28, 27, 24, 20, 9], 'test': array([43, 41,  1, 34, 13, 29, 26, 15], dtype=int64)}
Average Root Mean Squared Error per output: [1.47672943 1.73948783]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}
Average Root Mean Squared Error per output: [1.72833992 1.65897759]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}
Average Root Mean Squared Error per output: [1.327225   1.22900837]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 100, 'max_depth': 10, 'min_samples_split': 5, 'min_sa

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.64154466 2.22293818]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [2.40895596 1.61398474]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Average Root Mean Squared Error per output: [1.50588439 1.30063251]
Testing model: RandomForestRegressor with hyperparameters: {'n_estimators': 100, 'max_depth': 10, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'sqrt'} and aggregate: ['enlarged']
{'train': [31, 2, 45, 7, 5, 32, 18, 11, 17, 0, 30, 28, 27, 24, 20, 9], 'test': array([43, 41,  1, 34, 13, 29, 26, 15], dtype=int64)}
Average Root Mean Squared Error per output: [1.45697879 1.72678681]
{'train': [43, 2, 41, 7, 1, 32, 34, 11, 13, 0, 29, 28, 26, 24, 15, 9], 'test': array([31, 45,  5, 18, 17, 30, 27, 20], dtype=int64)}
Average Root Mean Squared Error per output: [1.70827415 1.64815048]
{'train': [43, 31, 41, 45, 1, 5, 34, 18, 13, 17, 29, 30, 26, 27, 15, 20], 'test': array([ 2,  7, 32, 11,  0, 28, 24,  9], dtype=int64)}


KeyboardInterrupt: 