## Importamos las librerías

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from modelo_energetico.col_reductor import reduce_columns_sum, reduce_columns_avg, reduce_columns_period_sum, reduce_columns_period_avg, total_q_hour 
from modelo_energetico.nn_gen import init_vanilla_nn
from modelo_energetico.scaler import MultiScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.multioutput import MultiOutputRegressor
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.callbacks import EarlyStopping

2021-10-17 07:44:56.064166: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-10-17 07:44:56.064222: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


## Cargamos los datasets

In [2]:
X = pd.read_csv('../raw_data/X.csv', index_col = 0)
y = pd.read_csv('../raw_data/y.csv', index_col = 0)

print(X.shape)
print(y.shape)

(7500, 12115)
(7500, 5376)


In [None]:
y.columns

In [None]:
cols_t = y.iloc[: , -672:]
X = pd.concat([X, cols_t], axis = 1)
y = y.iloc[: , :-672]

## Vemos que features son series

In [None]:
columns_s = [x[:-4] for x in X.columns if '671' in x]
columns_s

## Armamos datasets reducidos

In [None]:
columns_c = X.columns[0:19]
columns_c

In [None]:
columns_c = X.columns[0:16]

In [None]:
y = total_q_hour(y)
y.head()

In [None]:
X_reduced = {}
y_reduced = {}
for div in [1, 2, 4, 7, 14, 28]:
    X_reduced[div] = pd.concat([X[columns_c], reduce_columns_period_avg(X, columns_s, div)], axis = 1)
    y_reduced[div] = reduce_columns_period_sum(y, ['Q'], div)

In [None]:
X_reduced[28].head()

In [None]:
y_reduced[28].head()

## Probamos SGD con distintos períodos

In [None]:
pipe = Pipeline([
          ("scaling" , MultiScaler(scaler = "RobustScaler")),
        ("model", MultiOutputRegressor(estimator = SGDRegressor()))
])

In [None]:
for i in pipe.get_params().keys():
    print(i)

In [None]:
params = {
    'scaling__scaler' : [StandardScaler(), RobustScaler(), MinMaxScaler() ],     
    'model__estimator__loss': ['huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'],
    'model__estimator__alpha': [1, 0.1, 0.01, 0.001, 0.0001],
    'model__estimator__l1_ratio': [1, 0.1, 0.01, 0.001, 0.0001],
}

grid = GridSearchCV(pipe, 
                    param_grid= params, 
                    cv=5,
                    n_jobs=-1,
                    verbose=1,
                    scoring = 'r2'
                   )

In [None]:
models = {}
results = {}
X_reduced_train = {}
X_reduced_test = {}
y_reduced_train = {}
y_reduced_test = {}

In [None]:
for div in [7, 14, 28]:
    X_reduced_train[div], X_reduced_test[div], y_reduced_train[div], y_reduced_test[div] = train_test_split(
        X_reduced[div], y_reduced[div], test_size=0.3, random_state=42)

In [None]:
for div in [7, 14, 28]:
    grid.fit(X_reduced_train[div], y_reduced_train[div])
    
    start = datetime.now()
    best_estimator = grid.best_estimator_.fit(X_reduced_train[div], y_reduced_train[div])
    stop = datetime.now()
    best_score = best_estimator.score(X_reduced_test[div], y_reduced_test[div])
    time = (stop - start).seconds
  
    models[div] = best_estimator
    results[div] = (best_score, time)

In [None]:
results

In [None]:
models[7].named_steps.scaling

In [None]:
models[7].named_steps.model.estimator

## Probamos redes neuronales

In [None]:
nn_models = {}
nn_results = {}
nn_histories = {}

networks = [(10, 10, 5), (15, 10, 10, 5)] ## Probar distintas combinaciones
batch_size = [60, 80, 100] ## Probar distintas combinaciones
epochs = [10, 20] ## Probar distintas combinaciones
param_grid = dict(batch_size=batch_size, epochs=epochs)

es = EarlyStopping(patience=25, restore_best_weights=True)

for div in [7, 14, 28]:   
    input_dim = X_reduced_train[div].shape[1]
    output_dim = y_reduced_train[div].shape[1]

    for i, network in enumerate(networks):
    
    grid = GridSearchCV(estimator=KerasRegressor(build_fn=init_vanilla_nn, 
                                                 n_neurons=network,  
                                                 input_dim=input_dim, 
                                                 output_dim=output_dim), 
                        param_grid=param_grid, n_jobs=-1, cv=3)
    
    grid.fit(X_reduced_train[div], y_reduced_train[div], verbose=1)
    
    best_model = grid.best_estimator_

    start = datetime.now()
    hist = best_model.fit(X_reduced_train[div], y_reduced_train[div])
    stop = datetime.now()
    time = (stop - start).seconds
    best_score = max(hist.history['mae'])

    nn_results[(div, i)] = (best_score, time)
    nn_histories[(div, i)] = hist

In [None]:
nn_results[(14, 1)]