# Training

In [1]:
from IPython.display import clear_output

import numpy as np 
import pandas as pd

import os
import time

from copy import deepcopy

from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor

from sklearn.metrics import explained_variance_score
from sklearn.metrics import median_absolute_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

from sklearn import model_selection
from sklearn.preprocessing import StandardScaler, PowerTransformer

from sklearn import metrics

from sklearn import model_selection as dataset_split
from sklearn.preprocessing import StandardScaler

import sys

MODELS_RANDOM_STATE = 22

from sklearn import metrics
def process_results_classification(y_original: np.array, y_predicted: np.array) -> dict:
    precision, recall, _ = metrics.precision_recall_curve(y_original, y_predicted)
    metrics_dic = {"f1_macro":metrics.f1_score(y_original, y_predicted, average='macro'),
                   "VERSION" : 0.1}
    return metrics_dic

def process_results_regression(y_original: np.array, y_predicted: np.array) -> dict:
    metrics_dic = {"MAE":mean_absolute_error(y_original, y_predicted),
                   "VERSION" : 0.2}
    return metrics_dic



def correct_int(val):
    if val is None:
        return val
    else:
        return int(val)

def get_random_model_random_forest_regression_default(model_description:dict=None):
    n_estimators = 100 # 100 is the default
    max_depth =None # None is the default
    model_description = model_description if model_description is not None else {
        "Name" : "Random-Forest",
        "max_depth" : correct_int(max_depth),
        "n_estimators" : correct_int(n_estimators),
        "VERSION" : 0.1
    }
    return RandomForestRegressor(n_estimators=model_description["n_estimators"], 
                                 max_depth=model_description["max_depth"], 
                                 n_jobs=-1, 
                                 verbose=1, 
                                 random_state=MODELS_RANDOM_STATE), model_description


def get_random_model_lr_regression_default(model_description:dict=None):
    alpha = 1.0 #default
    model_description = model_description if model_description is not None else {
        "Name" : "Linear Regression",
        "alpha" : alpha,
        "VERSION" : 0.1
    }
    return ElasticNet(alpha=model_description["alpha"], l1_ratio=0,
                      random_state=MODELS_RANDOM_STATE), model_description

def get_random_model_desicion_tree_regression_default(model_description:dict=None):
    max_depth = None
    model_description = model_description if model_description is not None else {
        "Name" : "CART",
        "max_depth" : correct_int(max_depth),
        "VERSION" : 0.1
    }
    return DecisionTreeRegressor(max_depth=model_description["max_depth"], 
                                 random_state=MODELS_RANDOM_STATE), model_description






models_regression = {
    "LR" : get_random_model_lr_regression_default,
    "CART" : get_random_model_desicion_tree_regression_default,
    "RF" : get_random_model_random_forest_regression_default,
}

models_regression_desc = models_regression


# Dataset Reading

TRAIN_FILE = "./Playa_UPM_resampled_24H_1H.csv"
TEST_FILE = "./Presa_UPM_resampled_24H_1H.csv"

Training_Dataset = pd.read_csv(TRAIN_FILE)
Testing_Dataset = pd.read_csv(TEST_FILE)

# Preprocessing

Training_Dataset["date"] = Training_Dataset["Unnamed: 0"]
Testing_Dataset["date"] = Testing_Dataset["Unnamed: 0"]
Training_Dataset = Training_Dataset.T[1:].T
Testing_Dataset = Testing_Dataset.T[1:].T

orig_size = len(Training_Dataset)
Training_Dataset = Training_Dataset.dropna()

orig_size = len(Testing_Dataset)
Testing_Dataset = Testing_Dataset.dropna()

features_selector = {
    "NO-agg" : lambda x : [c for c in x.columns if ("4" not in c) and ("96" not in c) and (c != "date")],
    "1H-agg" : lambda x : [c for c in x.columns if ("96" not in c) and (c != "date")],
    "24H-agg" : lambda x : [c for c in x.columns if ("4" not in c) and (c != "date")],
    "ALL-agg" : lambda x : [c for c in x.columns if (c != "date")],
}

input_features_selector = lambda x, y : [c for c in features_selector[y](x) if "Chlorophyll" not in c]

output_features_base = {
    "NO-agg" : ['EXO3(Chlorophyll_ug_L)'],
    "1H-mean" : ['EXO3(Chlorophyll_ug_L)_mean_4'],
    "1H-median" : ['EXO3(Chlorophyll_ug_L)_median_4'],
    "24H-mean" : ['EXO3(Chlorophyll_ug_L)_mean_96'],
    "24H-median" : ['EXO3(Chlorophyll_ug_L)_median_96']
}

output_features_selector = lambda x, y : output_features_base[y]

def preprocessing_data(df_training, 
                       df_testing, 
                       input_features, 
                       output_features,
                       val_fraction = 0.20,
                       val_sub_fraction = 0.50,
                       shuffle_val_global = True,
                       shuffle_val_train = True,
                       random_state_split = 66): 
    # val_train_fraction can be set with very small value to kind of ignore it

    X_train, X_val = dataset_split.train_test_split(df_training, 
                                                      test_size=val_fraction, 
                                                      random_state=random_state_split, 
                                                      shuffle=shuffle_val_global)


    X_val_train, X_val_hyper = dataset_split.train_test_split(X_val, 
                                                      test_size=val_sub_fraction, 
                                                      random_state=random_state_split, 
                                                      shuffle=shuffle_val_train)

    Y_train = X_train[output_features]
    X_train = X_train[input_features]

    Y_val_hyper = X_val_hyper[output_features]
    X_val_hyper = X_val_hyper[input_features]

    Y_val_train = X_val_train[output_features]
    X_val_train = X_val_train[input_features]

    Y_test = df_testing[output_features]
    X_test = df_testing[input_features]

    scaler_X = PowerTransformer().fit(X_train)
    scaler_Y = PowerTransformer().fit(Y_train)
    
    X_train = scaler_X.transform(X_train)
    X_val_hyper = scaler_X.transform(X_val_hyper)
    X_val_train = scaler_X.transform(X_val_train)
    X_test = scaler_X.transform(X_test)
    
    Y_train = scaler_Y.transform(Y_train)
    Y_val_hyper = scaler_Y.transform(Y_val_hyper)
    Y_val_train = scaler_Y.transform(Y_val_train)
    Y_test = scaler_Y.transform(Y_test)

    X_train = pd.DataFrame(X_train, columns=input_features)
    X_val_hyper = pd.DataFrame(X_val_hyper, columns=input_features)
    X_val_train = pd.DataFrame(X_val_train, columns=input_features)
    X_test = pd.DataFrame(X_test, columns=input_features)

    Y_train = pd.DataFrame(Y_train, columns=output_features)
    Y_val_hyper = pd.DataFrame(Y_val_hyper, columns=output_features)
    Y_val_train = pd.DataFrame(Y_val_train, columns=output_features)
    Y_test = pd.DataFrame(Y_test, columns=output_features)
    
    dict_dataset = {
        "input" : {
            "train" : X_train,
            "val_train" : X_val_train,
            "val_hyper" : X_val_hyper,
            "test" : X_test,
        },
        "output" : {
            "train" : Y_train,
            "val_train" : Y_val_train,
            "val_hyper" : Y_val_hyper,
            "test" : Y_test
        },
        "scaler_X" : scaler_X,
        "scaler_Y" : scaler_Y,
        "features" : {
            "input" : input_features,
            "output" : output_features
        }
    }
    
    return dict_dataset

    

np.random.seed(2)

results_final = {}

kf = KFold(n_splits=10, shuffle=False)
SEED= 99
i = 0
j = 0
ONE_FOLD = False
for model_selection in models_regression:
    j = -1
    model, model_desc_base = models_regression[model_selection]()

    ONE_FOLD = False
    if model_desc_base["Name"] not in results_final:
        results_final[model_desc_base["Name"]] = {} # Hacer un diccionario con resultados y almacenarlo.

    for boya_0_part, boya_1_part in zip(kf.split(Training_Dataset), kf.split(Testing_Dataset)):

        training_dataset = pd.concat([Training_Dataset.iloc[boya_0_part[0]], Testing_Dataset.iloc[boya_1_part[0]]], ignore_index=True)
        testing_dataset = pd.concat([Training_Dataset.iloc[boya_0_part[1]], Testing_Dataset.iloc[boya_1_part[1]]], ignore_index=True)

        j += 1

        if ONE_FOLD:
            continue
        #ONE_FOLD = True
        for output_features_name in ["NO-agg"]:
            for input_features_name in ["NO-agg"]:
                if output_features_name not in results_final[model_desc_base["Name"]]:
                    results_final[model_desc_base["Name"]][output_features_name] = {}
                    
                if input_features_name not in results_final[model_desc_base["Name"]][output_features_name]:
                    results_final[model_desc_base["Name"]][output_features_name][input_features_name] = {}
                    results_final[model_desc_base["Name"]][output_features_name][input_features_name]["b0"] = []
                    results_final[model_desc_base["Name"]][output_features_name][input_features_name]["b1"] = []
                    
                input_features = input_features_selector(training_dataset, input_features_name)
                output_feature_selected = output_features_selector(training_dataset, output_features_name)

                local_dataset = preprocessing_data(training_dataset, 
                                           testing_dataset, 
                                           input_features, 
                                           output_feature_selected,
                                           shuffle_val_global=False,
                                           shuffle_val_train=False)


                general_experiment_name = model_selection +"_" + input_features_name +"_" + output_features_name + "_m_"+str(i) +"_k_"+ str(j)


                X_train_local = local_dataset["input"]["train"].values
                X_val_local = local_dataset["input"]["val_train"].values 
                X_val_train_local = local_dataset["input"]["val_hyper"].values 
                X_test_local = local_dataset["input"]["test"].values 
                print("Selected input column type: full")


                Y_train_local = local_dataset["output"]["train"][output_feature_selected].values
                Y_val_local = local_dataset["output"]["val_train"][output_feature_selected].values 
                Y_val_train_local = local_dataset["output"]["val_hyper"][output_feature_selected].values 
                Y_test_local = local_dataset["output"]["test"][output_feature_selected].values 
                print("Selected output column type: ", output_feature_selected)


                model, model_desc = models_regression_desc[model_selection](model_desc_base)

                # FIT
                training_time = time.time()
                model.fit(X_train_local, Y_train_local)
                training_time = time.time() - training_time

                feautres_importance = None
                if model_selection == "RF":
                    feautres_importance = {name:val for name, val in zip(input_features, model.feature_importances_)}




                # VALIDATE
                validation_time = time.time()

                Y_preds = model.predict(X_val_local)

                Y_val_local = local_dataset["scaler_Y"].inverse_transform(Y_val_local)
                Y_preds = local_dataset["scaler_Y"].inverse_transform(Y_preds.reshape(-1, 1)) 
                validation_time = time.time() - validation_time
                validation_shape = X_val_local.shape
                validation_speed = validation_shape[0] / validation_time

                results_desc_validation = process_results_regression(Y_val_local, Y_preds)


                # TEST
                testing_time = time.time()

                Y_preds_test = model.predict(X_test_local)

                Y_test_local = local_dataset["scaler_Y"].inverse_transform(Y_test_local)
                Y_preds_test = local_dataset["scaler_Y"].inverse_transform(Y_preds_test.reshape(-1, 1)) 
                
                testing_time = time.time() - testing_time
                
                testing_dataset_boya_0 = Training_Dataset.iloc[boya_0_part[1]][['date', output_feature_selected[0]]].copy()
                testing_dataset_boya_0[output_feature_selected[0]+ "_predict"] = Y_preds_test[:len(boya_0_part[1])]

                testing_dataset_boya_1 = Testing_Dataset.iloc[boya_1_part[1]][['date', output_feature_selected[0]]].copy()
                testing_dataset_boya_1[output_feature_selected[0]+ "_predict"] = Y_preds_test[len(boya_0_part[1]):]

                
                results_final[model_desc_base["Name"]][output_features_name][input_features_name]["b0"].append(testing_dataset_boya_0)
                results_final[model_desc_base["Name"]][output_features_name][input_features_name]["b1"].append(testing_dataset_boya_1)
                
                
                testing_shape = X_test_local.shape

                testing_speed = testing_shape[0] / testing_time

                results_desc_test = process_results_regression(Y_test_local, Y_preds_test)



                del(local_dataset)
                del(X_train_local)
                del(X_test_local)
                del(X_val_local)
                del(X_val_train_local)
                del(Y_train_local)
                del(Y_test_local)
                del(Y_val_local)
                del(Y_val_train_local)
                del(Y_preds)
                del(Y_preds_test)
                del(model)
                #clear_output(wait=True)
    results_final[model_desc_base["Name"]][output_features_name][input_features_name]["b0"] = pd.concat(results_final[model_desc_base["Name"]][output_features_name][input_features_name]["b0"])
    results_final[model_desc_base["Name"]][output_features_name][input_features_name]["b1"] = pd.concat(results_final[model_desc_base["Name"]][output_features_name][input_features_name]["b1"])

Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  loglike = -n_samples / 2 * np.log(x_trans.var())


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  loglike = -n_samples / 2 * np.log(x_trans.var())


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  loglike = -n_samples / 2 * np.log(x_trans.var())


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  loglike = -n_samples / 2 * np.log(x_trans.var())


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    5.6s finished
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    5.4s finished
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.1s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    5.3s finished
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.1s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  loglike = -n_samples / 2 * np.log(x_trans.var())


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    5.5s finished
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.0s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.1s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  loglike = -n_samples / 2 * np.log(x_trans.var())


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    5.4s finished
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    5.5s finished
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.1s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    5.5s finished
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.1s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    5.5s finished
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    5.5s finished
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


Selected input column type: full
Selected output column type:  ['EXO3(Chlorophyll_ug_L)']


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    5.5s finished
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed:    0.1s finished
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


In [2]:
for c in results_final:
    for b in results_final[c]["NO-agg"]["NO-agg"]:
        print(c,b)
        print(process_results_regression(results_final[c]["NO-agg"]["NO-agg"][b]["EXO3(Chlorophyll_ug_L)"], results_final[c]["NO-agg"]["NO-agg"][b]["EXO3(Chlorophyll_ug_L)_predict"]))
        print(process_results_classification(results_final[c]["NO-agg"]["NO-agg"][b]["EXO3(Chlorophyll_ug_L)"] > 10, results_final[c]["NO-agg"]["NO-agg"][b]["EXO3(Chlorophyll_ug_L)_predict"] > 10))

Linear Regression b0
{'MAE': 5.264582900794799, 'VERSION': 0.2}
{'f1_macro': 0.448955887844654, 'VERSION': 0.1}
Linear Regression b1
{'MAE': 5.242184919587791, 'VERSION': 0.2}
{'f1_macro': 0.41245593822546167, 'VERSION': 0.1}
CART b0
{'MAE': 6.519254841997629, 'VERSION': 0.2}
{'f1_macro': 0.6414448849331442, 'VERSION': 0.1}
CART b1
{'MAE': 5.610813193091852, 'VERSION': 0.2}
{'f1_macro': 0.6162709963012495, 'VERSION': 0.1}
Random-Forest b0
{'MAE': 5.767308251543529, 'VERSION': 0.2}
{'f1_macro': 0.6741258049896166, 'VERSION': 0.1}
Random-Forest b1
{'MAE': 4.9131910593587484, 'VERSION': 0.2}
{'f1_macro': 0.6377984851724358, 'VERSION': 0.1}
