In [1]:
import numpy as np
import pickle
from tqdm import tqdm 

import os
import sys
currentdir = os.path.dirname(os.path.realpath('results_DK'))
parentdir = os.path.dirname(currentdir)
sys.path.append(parentdir)

from func import rmse, weight_gen

Functions

In [2]:
def attach_weights(w, models, win):
    # Retrieve y_pred
    y_pred = np.empty((0, len(models[0][1]['best_y_pred_rmse'])))
    for model in models:
        y_pred_temp = [model[win]['best_y_pred_rmse']]
        y_pred = np.concatenate((y_pred, y_pred_temp))

    # Matrix multiplication    
    y_pred_w = np.dot(w, y_pred).round(2)

    #Find rmse
    y_true = model[win]['y_actual']
    rmse_w = [rmse(y_pred = y_pred_w, y_true = y_true)]

    # Create result_
    result = {}
    
    
    result['w'] = w
    result['best_rmse'] = rmse_w
    result['y_pred'] = y_pred
    result['best_y_pred_rmse'] = y_pred_w
    result['y_actual'] = y_true
    
    return result

def lowest_rmse(weights, models, win):
    result = {}

    for w in weights: 
        result[tuple(w)] = attach_weights(w = w, models = models, win = win)
        result_temp = result[tuple(w)]['best_rmse']
        #print(result)
    
    # lowest rmse
    best_par_rmse = min(result_temp)
    best = [result[k] for k in result if result[k]['best_rmse'] == best_par_rmse ]
    #print(len(best))
    result = best[0]
    
    return result

def w_windows(weights, models): 
    #CONTAINER WITH RESULTS
    results_dict = {}
    #LOOPING OVER EACH WINDOW
    for win in tqdm(models[0].keys()):
        results_dict[win] = lowest_rmse(weights = weights, models = models, win = win)
    
    return results_dict

def test_weights(w_windows, models):

    result = {}

    for key in w_windows:
        if key ==1:
            continue

        result[key] = attach_weights(w = w_windows[key-1]['w'], models = models, win = key)

    return result  

def mean_rmse(model, model_string, result_key = 'best_rmse'):
    temp = []
    for model_object, model_name in zip(model, model_string):
        for key in model_object.keys():
            temp.append(model_object[key][result_key][-1])
        print('Mean RMSE in ' + model_name + ': ' + str(round(np.mean(temp),4)))

Load data

In [3]:
# BASELINE
with open('results/final/baseline/results_baseline.pickle', 'rb') as handle:
    results_baseline = pickle.load(handle)

#LASSO
with open('results/final/lasso/results_final.pickle', 'rb') as handle:
    results_lasso = pickle.load(handle) 

#RIDGE
with open('results/final/ridge/results_final.pickle', 'rb') as handle:
    results_ridge = pickle.load(handle) 

# #ELASTIC
with open('results/final/elastic/results_final.pickle', 'rb') as handle:
    results_elastic = pickle.load(handle) 
    
# #RANDOM FOREST
with open('results/final/randomforest/results_final_no_int.pickle', 'rb') as handle:
    results_randomforest = pickle.load(handle) 

#XGBOOST
with open('results/final/xgboost/results_final_noint.pickle', 'rb') as handle:
    results_xgboost = pickle.load(handle)
# with open('results/final/xgboost/results_final_shap.pickle', 'rb') as handle:
#     results_xgboost = pickle.load(handle)

Initial definitions

In [4]:
models = [results_lasso, results_ridge, results_elastic, results_randomforest, results_xgboost]

In [5]:
weights = weight_gen(len(models), 30000)

Loop over windows

In [6]:
w_windows = w_windows(weights, models)

100%|████████████████████████████████████████████████████████████████████████████████| 103/103 [10:26<00:00,  6.08s/it]


Use previous key to set weights 

In [7]:
results_weights = test_weights(w_windows, models)

In [8]:
results_weights[4]

{'w': (0.19, 0.11, 0.3, 0.21, 0.19),
 'best_rmse': [0.24649543606322624],
 'y_pred': array([[ 0.1       ,  0.06      ,  0.1       ,  0.09      ,  0.07      ],
        [ 0.09      ,  0.09      ,  0.09      ,  0.09      ,  0.09      ],
        [ 0.09      ,  0.07      ,  0.09      ,  0.09      ,  0.08      ],
        [ 0.09      , -0.07      , -0.19      , -0.08      , -0.27      ],
        [ 0.01      , -0.23      , -0.44      , -0.22      , -0.40000001]]),
 'best_y_pred_rmse': array([ 0.08, -0.02, -0.07, -0.  , -0.09]),
 'y_actual': array([-0.2, -0.2, -0.4, -0.2, -0.3])}

Save as pickle

In [9]:
with open('results/final/weighted/results_final.pickle', 'wb') as handle:
    pickle.dump(results_weights, handle, protocol= pickle.HIGHEST_PROTOCOL)

In [10]:
with open('results/final/weighted/results_final.pickle', 'rb') as handle:
    results_weighted = pickle.load(handle) 

Mean RMSE

In [11]:
mean_rmse(model = [results_weighted], model_string = ['weighted model'], result_key = 'best_rmse')

Mean RMSE in weighted model: 0.1184
