# Imports and Configs

In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Imports

In [3]:
# Imports
# Various python imports
import datetime
import math
import pickle
import collections
import os.path as osp
import configparser
import copy

# ML imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf

# Jupyter
from IPython.core.display import display, HTML

# My code
import Process
import Load
import Eval
import Models
import Split
import TestInstanceParams

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## Configs

In [49]:
# configuration file
CONF_FILE_NAME = "run_conf.ini"
conf = configparser.ConfigParser()

In [50]:
config = configparser.ConfigParser()
config['Path'] = {'DATA_BASE_DIR ': r'C:\Users\User\לימודים\תואר שני\פרויקט גמר\Shir\Data',
                  'OUTPUT_BASE_DIR ': r'C:\Users\User\לימודים\תואר שני\פרויקט גמר\Shir\ML_Wave\output',
                     'ADCP ': 'ADCP_lev_Dec17Apr18.csv',
                 'BUOY_DEMO': 'haifa_cameriJan17.csv',
                 'BUOY' :'Buoy_Nov17_Mar18.csv',
                  'MODEL_DEEP' : 'ww3_lev_Dec17_Apr18.csv',
                  'MODEL_SHALLOW': 'ww3_shik_Dec17_Apr18.csv'}

config['Pref'] = {'ADCP_PREF': 'a',
                  'BUOY_PREF': 'b',
                  'PHYS_DEEP_PREF': 'ma',
                 'PHYS_SHALLOW_PREF': 'mb'}
config['Run Params']={}
with open(CONF_FILE_NAME, 'w') as configfile:
    config.write(configfile)

In [51]:
config.sections()

['Path', 'Pref', 'Run Params']

In [52]:
conf.read(CONF_FILE_NAME)

['run_conf.ini']

In [66]:
# red file and load consts

#OUTPUT_BASE_DIR = conf['Path']['OUTPUT_BASE_DIR']
OUTPUT_BASE_DIR=r'C:\Users\User\לימודים\תואר שני\פרויקט גמר\Shir\ML_Wave\output'
ADCP_PREF = conf['Pref']['ADCP_PREF']
BUOY_PREF = conf['Pref']['BUOY_PREF']
PHYS_DEEP_PREF = conf['Pref']['PHYS_DEEP_PREF']
PHYS_SHALLOW_PREF = conf['Pref']['PHYS_SHALLOW_PREF']

files_and_pref = [\
    [conf['Path']['ADCP'], conf['Pref']['ADCP_PREF']],
    [conf['Path']['BUOY'], conf['Pref']['BUOY_PREF']],
    [conf['Path']['MODEL_DEEP'], conf['Pref']['PHYS_DEEP_PREF']],
    [conf['Path']['MODEL_SHALLOW'], conf['Pref']['PHYS_SHALLOW_PREF']]
                 ]

In [67]:
files_and_pref

[['ADCP_lev_Dec17Apr18.csv', 'a'],
 ['Buoy_Nov17_Mar18.csv', 'b'],
 ['ww3_lev_Dec17_Apr18.csv', 'ma'],
 ['ww3_shik_Dec17_Apr18.csv', 'mb']]

# Load Data

In [68]:
#full_data = Load.load_all_data(files_and_pref, conf['Path']['DATA_BASE_DIR'])
new_dir=r'C:\Users\User\לימודים\תואר שני\פרויקט גמר\Shir\Data'
full_data = Load.load_all_data(files_and_pref, new_dir)

In [69]:
full_data.head()

Unnamed: 0_level_0,a_hs,a_dir,a_Tm,b_hs,b_dir,b_Tm,ma_hs,ma_dir,ma_Tm,mb_hs,mb_dir,mb_Tm
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017-12-04 11:00:00,0.9,333.0,33.05,0.3,93.0,2.63,0.284,87.0,4.22,0.284,316.0,5.73
2017-12-04 11:10:00,0.6,333.0,9.82,0.3,88.0,2.61,0.282,86.0,4.24,0.282,315.0,5.81
2017-12-04 11:20:00,0.6,284.0,9.46,0.3,84.0,2.59,0.279,84.0,4.27,0.279,314.0,5.87
2017-12-04 11:30:00,0.6,235.0,9.62,0.3,84.0,2.72,0.277,81.0,4.29,0.277,313.0,5.93
2017-12-04 11:40:00,0.5,186.0,4.3,0.3,50.0,2.76,0.275,78.0,4.31,0.275,313.0,5.98


## Prepare Data Functions

In [70]:
def downsample_data(data, ratio_for_downsample=2):
    return data.iloc[range(0, data.shape[0], ratio_for_downsample)]

def get_feature_and_target_data(data, target_col_name, is_target_in_features=True):
    if type(data) == list:
        target = [d[[target_col_name]] for d in data]
        if not is_target_in_features:
            data = [d.drop(target_col_name, axis=1) for d in data]
    else:
        target = data[[target_col_name]]
        if not is_target_in_features:
            data = data.drop(target_col_name, axis=1)
    return data, target

# Utils for running tests

## Fold running functions (run single fold, run kfold)

In [71]:
def run_single_fold_train_test(df, phys_target, run_params, pre, curr_fold_num):
    fold_dict = {}
    fold_dict["fold_num"] = curr_fold_num
    train, val, test, phys_val, phys_test = Split.kfold_split_train_test(df, curr_fold_num,
                                                k=run_params.k, phys_target=phys_target)
    pre.fit(*get_feature_and_target_data(
        train, run_params.target_col, run_params.is_target_in_input))
    fold_dict["preprocess"] = pre
    X_train, y_train, dates_y_train = pre.transform(
        *get_feature_and_target_data(train, run_params.target_col, run_params.is_target_in_input))
    X_val, y_val, dates_y_val = pre.transform(
        *get_feature_and_target_data(val, run_params.target_col, run_params.is_target_in_input))
    X_test, y_test, dates_y_test = pre.transform(
        *get_feature_and_target_data(test, run_params.target_col, run_params.is_target_in_input))
    input_dim = X_train.shape[2]
    model_structure_args = {"look_back": run_params.train_steps, "input_dimension": input_dim,
                           "build_config_description": run_params.desc_str + "_f{}".format(curr_fold_num)}
    
    fold_dict["train"] = {}
    fold_dict["val"] = {}
    fold_dict["test"] = {}
    
    fold_dict["train"]["dates"] = dates_y_train
    fold_dict["val"]["dates"] = dates_y_val
    fold_dict["test"]["dates"] = dates_y_test

    with tf.device("/cpu:0"):
        curr_model = run_params.model_class(**model_structure_args)

    with tf.device("/cpu:0"):
        # save trained model
        curr_model = curr_model.fit(X_train, y_train, val_data=(X_val, y_val), **run_params.model_args)
    
    fold_dict["model"] = curr_model
    
    fold_dict["test"]["pred"] = pre.inverse_scale_target(fold_dict["model"].predict(X_test))
    fold_dict["test"]["true"]  = pre.inverse_scale_target(y_test.reshape(-1, 1))
    fold_dict["test"]["ww3"]  = phys_test.iloc[run_params.train_steps + run_params.pred_forward:].values.reshape(-1,1)
    
    fold_dict["val"]["pred"] = pre.inverse_scale_target(fold_dict["model"].predict(X_val))
    fold_dict["val"]["true"] = pre.inverse_scale_target(y_val.reshape(-1, 1))
    fold_dict["val"]["ww3"] = phys_val.iloc[run_params.train_steps + run_params.pred_forward:].values.reshape(-1,1)
    
    fold_dict["train"]["pred"] = pre.inverse_scale_target(fold_dict["model"].predict(X_train))
    fold_dict["train"]["true"] = pre.inverse_scale_target(y_train.reshape(-1, 1))
    
    fold_dict["results_test"] = Eval.eval_pred_phys_const(fold_dict["test"], pre)
    fold_dict["results_val"] = Eval.eval_pred_phys_const(fold_dict["val"] , pre)
    # for train we don't look at ww3 model or const guess. interesting only to see if there's overfit
    train_eval = Eval.eval_model(
        fold_dict["train"]["true"], fold_dict["train"]["pred"])
    fold_dict["results_train"] = pd.Series(train_eval, name="ML")
    return fold_dict

def run_kfold_train_test(df, phys_target, run_params, pre):
    folds_run_data = {}
    folds_run_data["run_params"] = run_params
    results_test = []
    results_val = []
    results_train = []
    folds_to_run_on = list(range(run_params.k))
    if run_params.num_folds_to_run:
    # if num_folds_to_run < k, prefer running on last folds
        folds_to_run_on = folds_to_run_on[-run_params.num_folds_to_run:]
    folds_run_data["folds_dict"] = {}
    for i in folds_to_run_on:
        print("##### Running on fold {} #####".format(i))
        curr_fold_results = run_single_fold_train_test(df, phys_target, run_params, pre, i)
        folds_run_data["folds_dict"][i] = curr_fold_results
        results_test.append(folds_run_data["folds_dict"][i]["results_test"].assign(fold=i))
        results_val.append(folds_run_data["folds_dict"][i]["results_val"].assign(fold=i))
        results_train.append(folds_run_data["folds_dict"][i]["results_train"].to_frame().assign(fold=i))
    results_test = pd.concat(results_test)
    results_val = pd.concat(results_val)
    results_train = pd.concat(results_train)
    results_test = results_test.set_index(['fold', results_test.index])
    results_val = results_val.set_index(['fold', results_val.index])
    results_train = results_train.set_index(['fold', results_train.index])
    folds_run_data["results_test"] = results_test
    folds_run_data["results_val"] = results_val
    folds_run_data["results_train"] = results_train
    return folds_run_data

## One Cell to rule them all

In [72]:
def multi_func_run(data, *, target_col, col_names_and_offsets, input_data_str_repr,
                  pred_forward_hrs=4, look_back_hrs=12, time_sample_res_minutes=10):
    """
    receive 
    """
    model_str_repr = 'lstm1'
#     pred_forward_hrs = 4
#     look_back_hrs = 12
#     time_sample_res_minutes= 10
    k = 5
    num_folds_to_run = 5

    
    model_train_args = {"num_epochs" : 16, "batch_size": 100}
    model_class = Models.LSTMModel
    # model_class = Models.FCNNModel
    # model_class = Models.RandomForestModel
    
    col_names_and_offsets = col_names_and_offsets*int(60/time_sample_res_minutes)
    
    is_target_in_input=True
    if target_col not in col_names_and_offsets.index:
        is_target_in_input = False
        col_names_and_offsets[target_col] = 0
    
    run_params = TestInstanceParams.TestInstanceParams(input_data_str_repr=input_data_str_repr, \
        model_str_repr=model_str_repr, target_col=target_col, \
        is_target_in_input=is_target_in_input, pred_forward_hrs=pred_forward_hrs, \
        look_back_hrs=look_back_hrs, time_sample_res_minutes=time_sample_res_minutes, \
        k=k, num_folds_to_run=num_folds_to_run, \
        model_class=model_class, model_args=model_train_args, \
        desc_str_addition ='')
    
    data = downsample_data(data, run_params.downsample_ratio)
    df = Load.get_df_for_model(data, col_names_and_offsets)
    phys_target = df[run_params.phys_col]

    pre = Process.PreprocessData(steps_back=run_params.train_steps, \
                                 y_length=1, step_size=1, \
                              gap_forward=run_params.pred_forward)
    
    run_folds_dict = run_kfold_train_test(df, phys_target, run_params, pre)
    
    pd.options.display.float_format = '{:,.3f}'.format
    print(run_params.desc_str)
    display(run_folds_dict["results_test"].groupby(level=1).mean()[['rmse', 'r2', 'si', 'mae', 'max_error', 'my_weighted_rmse']])
    return run_folds_dict

In [73]:
# run_config_dict_single_example = {'col_names_and_offsets': cols_offsets,
#   'target_col': 'b_hs', 'input_data_str_repr':'mb4'} 

In [74]:
#### BUILD RUN CONFIGS ####
target_cols = ['b_hs', 'a_hs']

zero_offset = [0,0,0,0]
four_offset = [4,0,0,0]
eight_offset = [8,0,0,0]
offsets = [four_offset, zero_offset, eight_offset]

# Build data configurations and strings:
b_only_model = ('mb_hs',)
b_model_local_measurement = ('mb_hs', 'b_hs')
b_model_other_measurement = ('mb_hs', 'a_hs')
b_model_both_measurement = ('mb_hs', 'b_hs', 'a_hs')

str_b_only_model = 'mb'
str_b_model_local_measurement = 'hbmb'
str_b_model_other_measurement = 'hamb'
str_b_model_both_measurement = 'hahbmb'

data_options = [b_only_model, b_model_local_measurement,
                b_model_other_measurement, b_model_both_measurement]
strings = [str_b_only_model, str_b_model_local_measurement, str_b_model_other_measurement,
           str_b_model_both_measurement]

# creates tuples of - actual data columns, and the string representation for this data usage
data2string = dict(zip(data_options, strings))


# Build list of configuration dictionaries, which will later use multi_func_run in order
# to run all configurations of: offests, target columns, and data combinations
run_config_dicts_list = []
for offset_size in offsets:
    for target_col in target_cols:
        for data_opt, string_rpr in zip(data_options, strings):
            col_offsets = pd.Series(index=data_opt, data=offset_size[:len(data_opt)])
            run_config_dicts_list.append({"col_names_and_offsets":col_offsets,
                            "target_col": target_col, \
                                'input_data_str_repr':string_rpr + str(offset_size[0])})

In [75]:
#### BUILD RUN CONFIGS ####
# Using Data from one location (Buoy) for Forecasting conditions in another location (ADCP)
target_cols = ['a_hs']

zero_offset = [0,0,0,0]
four_offset = [4,0,0,0]
eight_offset = [8,0,0,0]
offsets = [zero_offset, four_offset, eight_offset]

# Build data configurations and strings:
a_only_model = ('ma_hs',)
a_model_other_hs = ('ma_hs', 'b_hs')
a_model_other_all = ('ma_hs', 'b_hs', 'b_dir')
other_all = ('b_hs', 'b_dir')

str_a_only_model = 'ma'
str_a_model_other_hs = 'hbma'
str_model_other_all = 'folddirhbma'
str_other_all = 'folddirhb'

data_options = [a_only_model, a_model_other_hs,
                a_model_other_all, other_all]
strings = [str_a_only_model, str_a_model_other_hs, str_model_other_all,
           str_other_all]
# creates tuples of - actual data columns, and the string representation for this data usage
data2string = dict(zip(data_options, strings))


# Build list of configuration dictionaries, which will later use multi_func_run in order
# to run all configurations of: offests, target columns, and data combinations
run_config_dicts_list = []
for offset_size in offsets:
    for target_col in target_cols:
        for data_opt, string_rpr in zip(data_options, strings):
            col_offsets = pd.Series(index=data_opt, data=offset_size[:len(data_opt)])
            run_config_dicts_list.append({"col_names_and_offsets":col_offsets,
                            "target_col": target_col, \
                                'input_data_str_repr':string_rpr + str(offset_size[0])})

In [76]:
run_config_dicts_list

[{'col_names_and_offsets': ma_hs    0
  dtype: int64,
  'target_col': 'a_hs',
  'input_data_str_repr': 'ma0'},
 {'col_names_and_offsets': ma_hs    0
  b_hs     0
  dtype: int64,
  'target_col': 'a_hs',
  'input_data_str_repr': 'hbma0'},
 {'col_names_and_offsets': ma_hs    0
  b_hs     0
  b_dir    0
  dtype: int64,
  'target_col': 'a_hs',
  'input_data_str_repr': 'folddirhbma0'},
 {'col_names_and_offsets': b_hs     0
  b_dir    0
  dtype: int64,
  'target_col': 'a_hs',
  'input_data_str_repr': 'folddirhb0'},
 {'col_names_and_offsets': ma_hs    4
  dtype: int64,
  'target_col': 'a_hs',
  'input_data_str_repr': 'ma4'},
 {'col_names_and_offsets': ma_hs    4
  b_hs     0
  dtype: int64,
  'target_col': 'a_hs',
  'input_data_str_repr': 'hbma4'},
 {'col_names_and_offsets': ma_hs    4
  b_hs     0
  b_dir    0
  dtype: int64,
  'target_col': 'a_hs',
  'input_data_str_repr': 'folddirhbma4'},
 {'col_names_and_offsets': b_hs     4
  b_dir    0
  dtype: int64,
  'target_col': 'a_hs',
  'input_dat

In [77]:
all_results_dicts = collections.OrderedDict()
for i, all_args in enumerate(run_config_dicts_list):
    print("--------------------------------------")
    print("           RUN NUMBER {}".format(i+1))
    print("--------------------------------------")
    folds_run_results_dict = multi_func_run(full_data, **all_args);
    folds_run_results_dict["run_config"] = all_args
    run_description_str = folds_run_results_dict["run_params"].desc_str
    save_directory = osp.join(OUTPUT_BASE_DIR, "pickle", "model_forward_scen_1")
    # tensorflow is not letting model be saved in dictionary with pickle
    # so this is a workaround
    for fold_data in folds_run_results_dict["folds_dict"].values():
        saved_model_file_name = run_description_str + "_model_fold_{}.h5".format(
                                fold_data['fold_num'])
        fold_data['model'].save(osp.join(save_directory, "models",
                            saved_model_file_name))
        del fold_data['model']
        fold_data['model_filename'] = saved_model_file_name
    with open(osp.join(save_directory, run_description_str + ".pkl"), "wb") as f:
        pickle.dump(folds_run_results_dict, f)
    all_results_dicts[run_description_str] = folds_run_results_dict

--------------------------------------
           RUN NUMBER 1
--------------------------------------
##### Running on fold 0 #####
Train on 13090 samples, validate on 1552 samples
Epoch 1/16

Epoch 00001: val_loss improved from inf to 0.00122, saving model to output\models\a4h_10mma0_lb12h_lstm1_f0.h5
13090/13090 - 35s - loss: 0.0165 - val_loss: 0.0012
Epoch 2/16

Epoch 00002: val_loss did not improve from 0.00122
13090/13090 - 31s - loss: 0.0014 - val_loss: 0.0013
Epoch 3/16

Epoch 00003: val_loss did not improve from 0.00122
13090/13090 - 32s - loss: 0.0014 - val_loss: 0.0014
Epoch 4/16

Epoch 00004: val_loss improved from 0.00122 to 0.00105, saving model to output\models\a4h_10mma0_lb12h_lstm1_f0.h5
13090/13090 - 34s - loss: 0.0021 - val_loss: 0.0011
Epoch 5/16

Epoch 00005: val_loss did not improve from 0.00105
13090/13090 - 33s - loss: 0.0013 - val_loss: 0.0011
Epoch 6/16

Epoch 00006: val_loss improved from 0.00105 to 0.00105, saving model to output\models\a4h_10mma0_lb12h_lstm1

Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.215,0.649,27.949,0.158,1.004,0.258
WW3,0.272,0.534,29.126,0.217,0.895,0.398


--------------------------------------
           RUN NUMBER 2
--------------------------------------
##### Running on fold 0 #####
Train on 13090 samples, validate on 1552 samples
Epoch 1/16

Epoch 00001: val_loss improved from inf to 0.00124, saving model to output\models\a4h_10mhbma0_lb12h_lstm1_f0.h5
13090/13090 - 51s - loss: 0.0090 - val_loss: 0.0012
Epoch 2/16

Epoch 00002: val_loss did not improve from 0.00124
13090/13090 - 42s - loss: 0.0014 - val_loss: 0.0013
Epoch 3/16

Epoch 00003: val_loss did not improve from 0.00124
13090/13090 - 42s - loss: 0.0013 - val_loss: 0.0013
Epoch 4/16

Epoch 00004: val_loss improved from 0.00124 to 0.00123, saving model to output\models\a4h_10mhbma0_lb12h_lstm1_f0.h5
13090/13090 - 42s - loss: 0.0015 - val_loss: 0.0012
Epoch 5/16

Epoch 00005: val_loss improved from 0.00123 to 0.00114, saving model to output\models\a4h_10mhbma0_lb12h_lstm1_f0.h5
13090/13090 - 45s - loss: 0.0011 - val_loss: 0.0011
Epoch 6/16

Epoch 00006: val_loss did not improve 

Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.212,0.69,26.758,0.153,1.005,0.251
WW3,0.272,0.534,29.126,0.217,0.895,0.398


--------------------------------------
           RUN NUMBER 3
--------------------------------------
##### Running on fold 0 #####
Train on 13090 samples, validate on 1552 samples
Epoch 1/16

Epoch 00001: val_loss improved from inf to 0.00128, saving model to output\models\a4h_10mfolddirhbma0_lb12h_lstm1_f0.h5
13090/13090 - 73s - loss: 0.0122 - val_loss: 0.0013
Epoch 2/16

Epoch 00002: val_loss improved from 0.00128 to 0.00125, saving model to output\models\a4h_10mfolddirhbma0_lb12h_lstm1_f0.h5
13090/13090 - 62s - loss: 0.0014 - val_loss: 0.0012
Epoch 3/16

Epoch 00003: val_loss did not improve from 0.00125
13090/13090 - 62s - loss: 0.0013 - val_loss: 0.0014
Epoch 4/16

Epoch 00004: val_loss improved from 0.00125 to 0.00115, saving model to output\models\a4h_10mfolddirhbma0_lb12h_lstm1_f0.h5
13090/13090 - 63s - loss: 0.0019 - val_loss: 0.0012
Epoch 5/16

Epoch 00005: val_loss improved from 0.00115 to 0.00112, saving model to output\models\a4h_10mfolddirhbma0_lb12h_lstm1_f0.h5
13090/13

Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.199,0.806,26.838,0.138,1.003,0.231
WW3,0.272,0.534,29.126,0.217,0.895,0.398


--------------------------------------
           RUN NUMBER 4
--------------------------------------


KeyError: 'ma_hs'

In [78]:
OUTPUT_BASE_DIR

'C:\\Users\\User\\לימודים\\תואר שני\\פרויקט גמר\\Shir\\ML_Wave\\output'

In [79]:
old_results_dict = copy.copy(all_results_dicts)

In [80]:
first_run_results = copy.copy(all_results_dicts)

In [81]:
all_results_dicts = collections.OrderedDict()
for i, all_args in enumerate(run_config_dicts_list):
    print("--------------------------------------")
    print("           RUN NUMBER {}".format(i+1))
    print("--------------------------------------")
    folds_run_results_dict = multi_func_run(full_data, **all_args);
    folds_run_results_dict["run_config"] = all_args
    run_description_str = folds_run_results_dict["run_params"].desc_str
    save_directory = osp.join(OUTPUT_BASE_DIR, "pickle", "model_forward_scen_1")
    # tensorflow is not letting model be saved in dictionary with pickle
    # so this is a workaround
    for fold_data in folds_run_results_dict["folds_dict"].values():
        saved_model_file_name = run_description_str + "_model_fold_{}.h5".format(
                                fold_data['fold_num'])
        fold_data['model'].save(osp.join(save_directory, "models",
                            saved_model_file_name))
        del fold_data['model']
        fold_data['model_filename'] = saved_model_file_name
    with open(osp.join(save_directory, run_description_str + ".pkl"), "wb") as f:
        pickle.dump(folds_run_results_dict, f)
    all_results_dicts[run_description_str] = folds_run_results_dict

--------------------------------------
           RUN NUMBER 1
--------------------------------------
##### Running on fold 0 #####
Train on 13090 samples, validate on 1552 samples
Epoch 1/16

Epoch 00001: val_loss improved from inf to 0.00126, saving model to output\models\a4h_10mma0_lb12h_lstm1_f0.h5
13090/13090 - 64s - loss: 0.0145 - val_loss: 0.0013
Epoch 2/16

Epoch 00002: val_loss did not improve from 0.00126
13090/13090 - 56s - loss: 0.0014 - val_loss: 0.0014
Epoch 3/16

Epoch 00003: val_loss improved from 0.00126 to 0.00118, saving model to output\models\a4h_10mma0_lb12h_lstm1_f0.h5
13090/13090 - 63s - loss: 0.0016 - val_loss: 0.0012
Epoch 4/16

Epoch 00004: val_loss improved from 0.00118 to 0.00114, saving model to output\models\a4h_10mma0_lb12h_lstm1_f0.h5
13090/13090 - 69s - loss: 0.0013 - val_loss: 0.0011
Epoch 5/16

Epoch 00005: val_loss improved from 0.00114 to 0.00111, saving model to output\models\a4h_10mma0_lb12h_lstm1_f0.h5
13090/13090 - 64s - loss: 0.0013 - val_loss:

Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.224,0.685,28.454,0.157,1.059,0.254
WW3,0.272,0.534,29.126,0.217,0.895,0.398


--------------------------------------
           RUN NUMBER 2
--------------------------------------
##### Running on fold 0 #####
Train on 13090 samples, validate on 1552 samples
Epoch 1/16

Epoch 00001: val_loss improved from inf to 0.00128, saving model to output\models\a4h_10mhbma0_lb12h_lstm1_f0.h5
13090/13090 - 101s - loss: 0.0105 - val_loss: 0.0013
Epoch 2/16

Epoch 00002: val_loss did not improve from 0.00128
13090/13090 - 87s - loss: 0.0014 - val_loss: 0.0014
Epoch 3/16

Epoch 00003: val_loss improved from 0.00128 to 0.00119, saving model to output\models\a4h_10mhbma0_lb12h_lstm1_f0.h5
13090/13090 - 89s - loss: 0.0013 - val_loss: 0.0012
Epoch 4/16

Epoch 00004: val_loss improved from 0.00119 to 0.00117, saving model to output\models\a4h_10mhbma0_lb12h_lstm1_f0.h5
13090/13090 - 87s - loss: 0.0013 - val_loss: 0.0012
Epoch 5/16

Epoch 00005: val_loss improved from 0.00117 to 0.00117, saving model to output\models\a4h_10mhbma0_lb12h_lstm1_f0.h5
13090/13090 - 87s - loss: 0.0012 - 

Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.215,0.599,26.262,0.156,1.021,0.261
WW3,0.272,0.534,29.126,0.217,0.895,0.398


--------------------------------------
           RUN NUMBER 3
--------------------------------------
##### Running on fold 0 #####
Train on 13090 samples, validate on 1552 samples
Epoch 1/16

Epoch 00001: val_loss improved from inf to 0.00149, saving model to output\models\a4h_10mfolddirhbma0_lb12h_lstm1_f0.h5
13090/13090 - 134s - loss: 0.0100 - val_loss: 0.0015
Epoch 2/16

Epoch 00002: val_loss improved from 0.00149 to 0.00119, saving model to output\models\a4h_10mfolddirhbma0_lb12h_lstm1_f0.h5
13090/13090 - 110s - loss: 0.0021 - val_loss: 0.0012
Epoch 3/16

Epoch 00003: val_loss improved from 0.00119 to 0.00117, saving model to output\models\a4h_10mfolddirhbma0_lb12h_lstm1_f0.h5
13090/13090 - 110s - loss: 0.0012 - val_loss: 0.0012
Epoch 4/16

Epoch 00004: val_loss improved from 0.00117 to 0.00113, saving model to output\models\a4h_10mfolddirhbma0_lb12h_lstm1_f0.h5
13090/13090 - 110s - loss: 0.0015 - val_loss: 0.0011
Epoch 5/16

Epoch 00005: val_loss improved from 0.00113 to 0.00111,

Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.201,0.794,26.816,0.143,0.995,0.238
WW3,0.272,0.534,29.126,0.217,0.895,0.398


--------------------------------------
           RUN NUMBER 4
--------------------------------------


KeyError: 'ma_hs'

In [82]:
for mod_name in first_run_results.keys():
    print(mod_name)
    try:
        display(first_run_results[mod_name]["results_test"].groupby(level=1).mean().loc[:,
            ['rmse', 'r2', 'si', 'mae', 'max_error', 'my_weighted_rmse']])
        display(all_results_dicts[mod_name]["results_test"].groupby(level=1).mean().loc[:,
            ['rmse', 'r2', 'si', 'mae', 'max_error', 'my_weighted_rmse']])
        print("--------------------")
    except:
        pass

a4h_10mma0_lb12h_lstm1


Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.215,0.649,27.949,0.158,1.004,0.258
WW3,0.272,0.534,29.126,0.217,0.895,0.398


Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.224,0.685,28.454,0.157,1.059,0.254
WW3,0.272,0.534,29.126,0.217,0.895,0.398


--------------------
a4h_10mhbma0_lb12h_lstm1


Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.212,0.69,26.758,0.153,1.005,0.251
WW3,0.272,0.534,29.126,0.217,0.895,0.398


Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.215,0.599,26.262,0.156,1.021,0.261
WW3,0.272,0.534,29.126,0.217,0.895,0.398


--------------------
a4h_10mfolddirhbma0_lb12h_lstm1


Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.199,0.806,26.838,0.138,1.003,0.231
WW3,0.272,0.534,29.126,0.217,0.895,0.398


Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.201,0.794,26.816,0.143,0.995,0.238
WW3,0.272,0.534,29.126,0.217,0.895,0.398


--------------------


In [83]:
for mod_name in all_results_dicts.keys():
    print(mod_name)
    display(all_results_dicts[mod_name]["results_test"].groupby(level=1).mean().loc[:,
        ['rmse', 'r2', 'si', 'mae', 'max_error', 'my_weighted_rmse']])

a4h_10mma0_lb12h_lstm1


Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.224,0.685,28.454,0.157,1.059,0.254
WW3,0.272,0.534,29.126,0.217,0.895,0.398


a4h_10mhbma0_lb12h_lstm1


Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.215,0.599,26.262,0.156,1.021,0.261
WW3,0.272,0.534,29.126,0.217,0.895,0.398


a4h_10mfolddirhbma0_lb12h_lstm1


Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.201,0.794,26.816,0.143,0.995,0.238
WW3,0.272,0.534,29.126,0.217,0.895,0.398


In [84]:
for mod_name in old_results_dict.keys():
    print(mod_name)
    display(all_results_dicts[mod_name]["results_test"].groupby(level=1).mean().loc[:,
        ['rmse', 'r2', 'si', 'mae', 'max_error', 'my_weighted_rmse']])

a4h_10mma0_lb12h_lstm1


Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.224,0.685,28.454,0.157,1.059,0.254
WW3,0.272,0.534,29.126,0.217,0.895,0.398


a4h_10mhbma0_lb12h_lstm1


Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.215,0.599,26.262,0.156,1.021,0.261
WW3,0.272,0.534,29.126,0.217,0.895,0.398


a4h_10mfolddirhbma0_lb12h_lstm1


Unnamed: 0,rmse,r2,si,mae,max_error,my_weighted_rmse
Const_Guess,0.183,0.846,28.66,0.11,1.04,0.219
ML,0.201,0.794,26.816,0.143,0.995,0.238
WW3,0.272,0.534,29.126,0.217,0.895,0.398
