In [1]:
%load_ext lab_black
%reload_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import os
import glob
import tqdm
import umap
import lightgbm as lgb
import numpy as np

from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

import matplotlib.pyplot as plt

from utils import DATA_SETS, gather_df, prepare_df, model_function

In [2]:
DATA_SETS

['aquifer_auser',
 'water_spring_amiata',
 'aquifer_petrignano',
 'aquifer_doganella',
 'aquifer_luco',
 'river_arno',
 'lake_bilancino',
 'water_spring_lupa',
 'water_spring_madonna_di_canneto']

In [None]:
import optuna
import json

result_folder = "sim-res-final"

info_file_name = "./{}/info.json".format(result_folder)
if os.path.exists(info_file_name):
    with open(info_file_name) as f:
        info = json.loads(f.read())
else:
    info = []

for dataset in DATA_SETS:
    df = gather_df(dataset, True)
    target_cols = [c for c in df.columns if "target" in c]

    for target_col in target_cols:
        for pred_ahead in [14, 28, 56]:
            # compute the size of location_matrix_W, which is variable per dataset
            n_feats = (
                len([c for c in df.columns if "ws10m_max" in c])
                + len([c for c in df.columns if "rainfall" in c])
                + len([c for c in df.columns if "temperature" in c])
            )

            # this is the optimization function that we are going to hyperoptimize
            def objective(trial, return_dataframes=False):
                location_array_W = []
                for i in range(n_feats):
                    location_array_W.append(trial.suggest_float("W_{}".format(i), 0, 1))
                dfp_val, dfp_test, lgb_model = model_function(
                    dataset,
                    location_array_W,
                    pred_ahead,
                    target_col,
                    extended_data=trial.suggest_categorical(
                        "extended_data", [True, False]
                    ),
                    impute_missing=trial.suggest_categorical(
                        "impute_missing", [True, False]
                    ),
                    do_extract=trial.suggest_categorical("do_extract", [True, False]),
                    shift_features=trial.suggest_categorical(
                        "shift_features", [True, False]
                    ),
                    use_early_stopping=trial.suggest_categorical(
                        "use_early_stopping", [True, False]
                    ),
                    lgb_boosting_type=trial.suggest_categorical(
                        "lgb_boosting_type", ["gbdt", "goss", "dart"]
                    ),
                    lgb_num_leaves=trial.suggest_categorical(
                        "lgb_num_leaves", [10, 31, 50]
                    ),
                    lgb_learning_rate=trial.suggest_uniform(
                        "lgb_learning_rate", 0.01, 0.3
                    ),
                    lgb_max_depth=trial.suggest_int(
                        "lgb_max_depth", -1, 10
                    ),  # -1 here means infinite
                )
                if return_dataframes:
                    return dfp_val, dfp_test
                return np.mean(
                    np.abs(dfp_val.p - dfp_val.y)
                )  # we optimize on the MAE of the validation dataset

            study = optuna.create_study()
            study.optimize(objective, n_trials=100)

            # the outputting dataframe with optimal parameters
            location_array_W_optim = []
            for k, v in study.best_params.items():
                if "W_" in k:
                    location_array_W_optim.append(v)

            dfp_val, dfp_test, lgb_model = model_function(
                dataset,
                location_array_W_optim,
                pred_ahead,
                target_col,
                extended_data=study.best_params["extended_data"],
                impute_missing=study.best_params["impute_missing"],
                do_extract=study.best_params["do_extract"],
                shift_features=study.best_params["shift_features"],
                use_early_stopping=study.best_params["use_early_stopping"],
                lgb_boosting_type=study.best_params["lgb_boosting_type"],
                lgb_num_leaves=study.best_params["lgb_num_leaves"],
                lgb_learning_rate=study.best_params["lgb_learning_rate"],
                lgb_max_depth=study.best_params["lgb_max_depth"],
            )

            run_id = np.random.randint(1, 100000000000000)

            mae_test = np.mean(np.abs(dfp_test.p - dfp_test.y))
            rmse_test = np.sqrt(np.mean((dfp_test.p - dfp_test.y) ** 2))

            mae_val = np.mean(np.abs(dfp_val.p - dfp_val.y))
            rmse_val = np.sqrt(np.mean((dfp_val.p - dfp_val.y) ** 2))

            info.append(
                {
                    "run_id": run_id,
                    "best_params": study.best_params,
                    "location_weights": location_array_W_optim,
                    "dataset": dataset,
                    "target_col": target_col,
                    "pred_ahead": pred_ahead,
                    "mae_test": mae_test,
                    "rmse_test": rmse_test,
                    "mae_val": mae_val,
                    "rmse_val": rmse_val,
                    "dfp_test_start": str(dfp_test.index[0]),
                    "dfp_test_length": len(dfp_test.index),
                    "mae_normalized_val": (
                        np.abs((dfp_val.y - dfp_val.p))
                        / np.abs(dfp_val.original.mean())
                    ).mean(),
                    "mae_normalized_test": (
                        np.abs((dfp_test.y - dfp_test.p))
                        / np.abs(dfp_test.original.mean())
                    ).mean(),
                }
            )

            with open(info_file_name, "w+") as f:
                json.dump(info, f)
            dfp_val.reset_index().to_feather(
                "./{}/{}-validation.feather".format(result_folder, run_id)
            )
            dfp_test.reset_index().to_feather(
                "./{}/{}-test.feather".format(result_folder, run_id)
            )

[I 2021-02-17 12:34:35,975] Trial 38 finished with value: 3.5398209441702044 and parameters: {'W_0': 0.067957980404099, 'W_1': 0.8183215973636786, 'W_2': 0.029505285881224013, 'W_3': 0.30323337049113824, 'W_4': 0.9071966018204325, 'W_5': 0.8100794436261479, 'extended_data': True, 'impute_missing': True, 'do_extract': False, 'shift_features': False, 'use_early_stopping': False, 'lgb_boosting_type': 'gbdt', 'lgb_num_leaves': 50, 'lgb_learning_rate': 0.19093354064002532, 'lgb_max_depth': -1}. Best is trial 27 with value: 2.21916287595357.
[I 2021-02-17 12:34:36,198] Trial 39 finished with value: 3.969962323297958 and parameters: {'W_0': 0.22007807339383695, 'W_1': 0.9492078375526685, 'W_2': 0.16020086541985748, 'W_3': 0.19316002741206922, 'W_4': 0.7293011641206483, 'W_5': 0.9021890812451094, 'extended_data': True, 'impute_missing': True, 'do_extract': True, 'shift_features': False, 'use_early_stopping': True, 'lgb_boosting_type': 'dart', 'lgb_num_leaves': 50, 'lgb_learning_rate': 0.248660



[I 2021-02-17 12:35:02,886] Trial 0 finished with value: 4.939320620194138 and parameters: {'W_0': 0.7035178965401797, 'W_1': 0.5165996953670825, 'W_2': 0.6939642332039284, 'W_3': 0.5452698789474132, 'W_4': 0.5184357503168293, 'W_5': 0.7271445828856226, 'extended_data': False, 'impute_missing': True, 'do_extract': True, 'shift_features': False, 'use_early_stopping': False, 'lgb_boosting_type': 'goss', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.19861411696891138, 'lgb_max_depth': 5}. Best is trial 0 with value: 4.939320620194138.
[I 2021-02-17 12:35:03,039] Trial 1 finished with value: 6.36563101403383 and parameters: {'W_0': 0.5043831558826619, 'W_1': 0.611130562852176, 'W_2': 0.5001289091954517, 'W_3': 0.943081914147461, 'W_4': 0.7753217687440361, 'W_5': 0.8331697571489926, 'extended_data': True, 'impute_missing': False, 'do_extract': False, 'shift_features': False, 'use_early_stopping': True, 'lgb_boosting_type': 'gbdt', 'lgb_num_leaves': 10, 'lgb_learning_rate': 0.2750122301583301



[I 2021-02-17 12:35:04,521] Trial 5 finished with value: 4.47427003494198 and parameters: {'W_0': 0.6382188350914204, 'W_1': 0.5487398948684069, 'W_2': 0.1088927715487924, 'W_3': 0.8099971806892486, 'W_4': 0.7675270766645018, 'W_5': 0.7288505176073536, 'extended_data': True, 'impute_missing': True, 'do_extract': False, 'shift_features': True, 'use_early_stopping': False, 'lgb_boosting_type': 'goss', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.24411694630398334, 'lgb_max_depth': 6}. Best is trial 5 with value: 4.47427003494198.
[I 2021-02-17 12:35:04,693] Trial 6 finished with value: 5.842802973727494 and parameters: {'W_0': 0.061595509446337404, 'W_1': 0.5930778144856504, 'W_2': 0.2373449180997158, 'W_3': 0.6015051582352329, 'W_4': 0.6396608020406646, 'W_5': 0.1446773728405384, 'extended_data': False, 'impute_missing': True, 'do_extract': False, 'shift_features': False, 'use_early_stopping': True, 'lgb_boosting_type': 'goss', 'lgb_num_leaves': 50, 'lgb_learning_rate': 0.27730185643378



[I 2021-02-17 12:35:05,850] Trial 10 finished with value: 3.9778467342457984 and parameters: {'W_0': 0.2377087407348481, 'W_1': 0.9863037858340495, 'W_2': 0.0423758062887622, 'W_3': 0.9873919308789677, 'W_4': 0.9961897371544257, 'W_5': 0.9940906213299305, 'extended_data': True, 'impute_missing': True, 'do_extract': False, 'shift_features': True, 'use_early_stopping': False, 'lgb_boosting_type': 'goss', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.012410733489863518, 'lgb_max_depth': 5}. Best is trial 10 with value: 3.9778467342457984.




[I 2021-02-17 12:35:06,452] Trial 11 finished with value: 4.131526475743107 and parameters: {'W_0': 0.22767480702197826, 'W_1': 0.9995929946354241, 'W_2': 0.0290730667176599, 'W_3': 0.995559433467538, 'W_4': 0.9944538119294752, 'W_5': 0.9759625650369397, 'extended_data': True, 'impute_missing': True, 'do_extract': False, 'shift_features': True, 'use_early_stopping': False, 'lgb_boosting_type': 'goss', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.021305307004279482, 'lgb_max_depth': 5}. Best is trial 10 with value: 3.9778467342457984.
[I 2021-02-17 12:35:06,959] Trial 12 finished with value: 3.7599479052509444 and parameters: {'W_0': 0.1877535517993928, 'W_1': 0.9491797352950329, 'W_2': 0.03424219532156753, 'W_3': 0.9988821276026116, 'W_4': 0.9415167360344486, 'W_5': 0.9947242633502371, 'extended_data': True, 'impute_missing': True, 'do_extract': False, 'shift_features': True, 'use_early_stopping': False, 'lgb_boosting_type': 'goss', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.01370213



[I 2021-02-17 12:35:16,771] Trial 30 finished with value: 4.803280014860068 and parameters: {'W_0': 0.2922324629452654, 'W_1': 0.9351536496310741, 'W_2': 0.4140327916322469, 'W_3': 0.17643783823642095, 'W_4': 0.7248488947357576, 'W_5': 0.014767869840482284, 'extended_data': True, 'impute_missing': True, 'do_extract': False, 'shift_features': True, 'use_early_stopping': False, 'lgb_boosting_type': 'goss', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.17297716149014944, 'lgb_max_depth': 6}. Best is trial 23 with value: 3.6971901211720826.
[I 2021-02-17 12:35:17,283] Trial 31 finished with value: 3.805984242878481 and parameters: {'W_0': 0.183072730179075, 'W_1': 0.9949339830101411, 'W_2': 0.37206300834145956, 'W_3': 0.25234824076671264, 'W_4': 0.9204128092394062, 'W_5': 0.9968914705746297, 'extended_data': True, 'impute_missing': True, 'do_extract': False, 'shift_features': True, 'use_early_stopping': False, 'lgb_boosting_type': 'goss', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.0198817



[I 2021-02-17 12:35:25,596] Trial 49 finished with value: 4.181897236012677 and parameters: {'W_0': 0.0015015433243527956, 'W_1': 0.9907479495547673, 'W_2': 0.27516288547984047, 'W_3': 0.18551280455952043, 'W_4': 0.5930850049868628, 'W_5': 0.6497358928314362, 'extended_data': True, 'impute_missing': True, 'do_extract': False, 'shift_features': False, 'use_early_stopping': True, 'lgb_boosting_type': 'dart', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.03372852111870626, 'lgb_max_depth': 10}. Best is trial 23 with value: 3.6971901211720826.
[I 2021-02-17 12:35:26,108] Trial 50 finished with value: 3.7343357687540575 and parameters: {'W_0': 0.08257124413231627, 'W_1': 0.8633962852307494, 'W_2': 0.20126423371928284, 'W_3': 0.052102634232883, 'W_4': 0.7809114069163594, 'W_5': 0.4377884874883081, 'extended_data': False, 'impute_missing': True, 'do_extract': False, 'shift_features': True, 'use_early_stopping': True, 'lgb_boosting_type': 'dart', 'lgb_num_leaves': 50, 'lgb_learning_rate': 0.025



[I 2021-02-17 12:35:55,328] Trial 7 finished with value: 6.1732088602573665 and parameters: {'W_0': 0.79793940015142, 'W_1': 0.9946576338108004, 'W_2': 0.8817148342778277, 'W_3': 0.2306315517495332, 'W_4': 0.33070440381268495, 'W_5': 0.39414025186859447, 'extended_data': False, 'impute_missing': False, 'do_extract': False, 'shift_features': False, 'use_early_stopping': False, 'lgb_boosting_type': 'goss', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.06988137196886336, 'lgb_max_depth': 6}. Best is trial 3 with value: 4.579593696266298.
[I 2021-02-17 12:35:55,923] Trial 8 finished with value: 5.153083608975184 and parameters: {'W_0': 0.15661015138271894, 'W_1': 0.9046113497393633, 'W_2': 0.6813502031999334, 'W_3': 0.7552854618173649, 'W_4': 0.8671127713698339, 'W_5': 0.819699974826283, 'extended_data': True, 'impute_missing': True, 'do_extract': False, 'shift_features': True, 'use_early_stopping': True, 'lgb_boosting_type': 'goss', 'lgb_num_leaves': 50, 'lgb_learning_rate': 0.120727571818



[I 2021-02-17 12:36:49,398] Trial 1 finished with value: 0.3822186099440834 and parameters: {'W_0': 0.893280966622699, 'W_1': 0.2010118342953794, 'W_2': 0.8454447722309932, 'W_3': 0.842679476920432, 'W_4': 0.0014406570742377944, 'W_5': 0.7312601203596981, 'extended_data': True, 'impute_missing': True, 'do_extract': False, 'shift_features': True, 'use_early_stopping': False, 'lgb_boosting_type': 'dart', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.11844299392985475, 'lgb_max_depth': 5}. Best is trial 0 with value: 0.3805267017798912.




[I 2021-02-17 12:36:49,508] Trial 2 finished with value: 0.44377306131178784 and parameters: {'W_0': 0.26200786657090014, 'W_1': 0.4918308759255502, 'W_2': 0.8661750818178624, 'W_3': 0.6893955086371794, 'W_4': 0.006900784486391864, 'W_5': 0.3065226801012211, 'extended_data': False, 'impute_missing': False, 'do_extract': False, 'shift_features': False, 'use_early_stopping': False, 'lgb_boosting_type': 'goss', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.283161172404073, 'lgb_max_depth': 5}. Best is trial 0 with value: 0.3805267017798912.
[I 2021-02-17 12:36:49,958] Trial 3 finished with value: 0.4100546568291954 and parameters: {'W_0': 0.3409464071546846, 'W_1': 0.2726436014879734, 'W_2': 0.9962359603857248, 'W_3': 0.31380840728605053, 'W_4': 0.9963187930665525, 'W_5': 0.691389448652054, 'extended_data': False, 'impute_missing': False, 'do_extract': False, 'shift_features': True, 'use_early_stopping': True, 'lgb_boosting_type': 'goss', 'lgb_num_leaves': 50, 'lgb_learning_rate': 0.136298



[I 2021-02-17 12:36:52,331] Trial 8 finished with value: 0.38597618292427127 and parameters: {'W_0': 0.34203613494026164, 'W_1': 0.9410463804860311, 'W_2': 0.3036823938270429, 'W_3': 0.6306838798630666, 'W_4': 0.8630492970389148, 'W_5': 0.005595162001041709, 'extended_data': False, 'impute_missing': True, 'do_extract': False, 'shift_features': True, 'use_early_stopping': True, 'lgb_boosting_type': 'gbdt', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.027606452334191144, 'lgb_max_depth': 6}. Best is trial 0 with value: 0.3805267017798912.
[I 2021-02-17 12:36:52,890] Trial 9 finished with value: 0.3885061644959493 and parameters: {'W_0': 0.24047438541238841, 'W_1': 0.7060542316662013, 'W_2': 0.3448779025653551, 'W_3': 0.686604812554156, 'W_4': 0.7800555167832166, 'W_5': 0.8037285061939956, 'extended_data': False, 'impute_missing': True, 'do_extract': True, 'shift_features': True, 'use_early_stopping': True, 'lgb_boosting_type': 'dart', 'lgb_num_leaves': 50, 'lgb_learning_rate': 0.20956372



[I 2021-02-17 12:36:53,892] Trial 11 finished with value: 0.3903760064895969 and parameters: {'W_0': 0.6734982431238187, 'W_1': 0.02024255773221162, 'W_2': 0.16204055994435734, 'W_3': 0.9978998889342319, 'W_4': 0.08311042205694458, 'W_5': 0.6891548610096969, 'extended_data': True, 'impute_missing': True, 'do_extract': True, 'shift_features': True, 'use_early_stopping': False, 'lgb_boosting_type': 'dart', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.10703250760037107, 'lgb_max_depth': 9}. Best is trial 0 with value: 0.3805267017798912.
[I 2021-02-17 12:36:54,480] Trial 12 finished with value: 0.3807584224161824 and parameters: {'W_0': 0.01005390943008, 'W_1': 0.06785645979264238, 'W_2': 0.004149536604553639, 'W_3': 0.8989448562237963, 'W_4': 0.6270590473299477, 'W_5': 0.7840183506833309, 'extended_data': True, 'impute_missing': True, 'do_extract': True, 'shift_features': True, 'use_early_stopping': False, 'lgb_boosting_type': 'dart', 'lgb_num_leaves': 10, 'lgb_learning_rate': 0.18412567



[I 2021-02-17 12:37:05,862] Trial 37 finished with value: 0.3920414149373964 and parameters: {'W_0': 0.9913319076916176, 'W_1': 0.23298557078201795, 'W_2': 0.2547057574516594, 'W_3': 0.5561929942199716, 'W_4': 0.5395978350745159, 'W_5': 0.07573200506614461, 'extended_data': False, 'impute_missing': False, 'do_extract': True, 'shift_features': True, 'use_early_stopping': True, 'lgb_boosting_type': 'dart', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.29270954689534745, 'lgb_max_depth': 5}. Best is trial 34 with value: 0.3510685619626499.
[I 2021-02-17 12:37:06,374] Trial 38 finished with value: 0.427388913502028 and parameters: {'W_0': 0.8935011575846321, 'W_1': 0.06867078924517267, 'W_2': 0.5058001589280154, 'W_3': 0.7246085543489388, 'W_4': 0.43253956857003895, 'W_5': 0.498226054127041, 'extended_data': False, 'impute_missing': True, 'do_extract': False, 'shift_features': True, 'use_early_stopping': True, 'lgb_boosting_type': 'goss', 'lgb_num_leaves': 10, 'lgb_learning_rate': 0.2172284



[I 2021-02-17 12:37:37,891] Trial 7 finished with value: 0.5639233132094368 and parameters: {'W_0': 0.6066780546180061, 'W_1': 0.04957358146767643, 'W_2': 0.18488935376697413, 'W_3': 0.7825479717065706, 'W_4': 0.6885255547613455, 'W_5': 0.2839383898766471, 'extended_data': True, 'impute_missing': False, 'do_extract': False, 'shift_features': False, 'use_early_stopping': True, 'lgb_boosting_type': 'dart', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.29782671364614105, 'lgb_max_depth': 7}. Best is trial 2 with value: 0.4709037996685573.
[I 2021-02-17 12:37:38,356] Trial 8 finished with value: 0.4938222993235571 and parameters: {'W_0': 0.2857627167317217, 'W_1': 0.702444728533427, 'W_2': 0.7365174989927127, 'W_3': 0.5308618096976392, 'W_4': 0.8979907107765576, 'W_5': 0.22809170186491934, 'extended_data': True, 'impute_missing': True, 'do_extract': False, 'shift_features': True, 'use_early_stopping': False, 'lgb_boosting_type': 'gbdt', 'lgb_num_leaves': 50, 'lgb_learning_rate': 0.234075606



[I 2021-02-17 12:37:41,549] Trial 15 finished with value: 0.4980612586844817 and parameters: {'W_0': 0.2592492031812237, 'W_1': 0.5105084751540181, 'W_2': 0.25284770109532084, 'W_3': 0.9071131876753, 'W_4': 0.21006499167432308, 'W_5': 0.6446182813034333, 'extended_data': True, 'impute_missing': True, 'do_extract': True, 'shift_features': False, 'use_early_stopping': True, 'lgb_boosting_type': 'dart', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.013949230857407902, 'lgb_max_depth': 5}. Best is trial 2 with value: 0.4709037996685573.
[I 2021-02-17 12:37:41,752] Trial 16 finished with value: 0.5004733483132837 and parameters: {'W_0': 0.6778002642701023, 'W_1': 0.36161949004051847, 'W_2': 0.5771773486122116, 'W_3': 0.5856074463324263, 'W_4': 0.5165740411338535, 'W_5': 0.3852610875827976, 'extended_data': True, 'impute_missing': True, 'do_extract': True, 'shift_features': False, 'use_early_stopping': True, 'lgb_boosting_type': 'goss', 'lgb_num_leaves': 31, 'lgb_learning_rate': 0.04698281835

In [6]:
with open(info_file_name, "w+") as f:
    json.dump(info, f)

FileNotFoundError: [Errno 2] No such file or directory: './sim-res2/info.json'

In [4]:
dfp_test

Unnamed: 0_level_0,p,y,original
index_col,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-10-20,-109.311626,-109.51,-109.37
2018-10-21,-109.321621,-109.52,-109.38
2018-10-22,-109.331615,-109.53,-109.39
2018-10-23,-109.341610,-109.54,-109.40
2018-10-24,-109.351605,-109.55,-109.41
...,...,...,...
2020-06-12,-78.588033,-73.93,-78.63
2020-06-13,-78.248214,-73.60,-78.29
2020-06-14,-77.858422,-73.14,-77.90
2020-06-15,-67.188506,-72.88,-77.43


In [8]:
-1 + 1e-6

-0.999999

In [6]:
dfp_val

Unnamed: 0_level_0,p,y,original
index_col,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-24,-0.0,-103.169905,-103.03
2017-01-25,-0.0,-103.179905,-103.04
2017-01-28,-0.0,-103.209905,-103.07
2017-01-29,-0.0,-103.219905,-103.08
2017-01-30,-0.0,-103.229905,-103.09
...,...,...,...
2018-09-30,-0.0,-109.309934,-109.19
2018-10-01,-0.0,-109.319934,-109.20
2018-10-02,-0.0,-109.329934,-109.21
2018-10-03,-0.0,-109.339934,-109.22


In [42]:
dfp_val.reset_index().to_feather("./sim-res/{}-validation.feather".format(run_id))
dfp_test.reset_index().to_feather("./sim-res/{}-test.feather".format(run_id))

# info.append({
#     'run_id': run_id,
#     'best_params': best_params,
#     'location_weights': location_array_W_optim,
#     'dataset': dataset,
#     'target_col': target_col,
#     'pred_ahead': pred_ahead
# })
# with open(info_file_name, 'w') as f:
#     json.dump(info, f)