In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from keras.models import load_model
from numpy import concatenate
from pandas import DataFrame
from pandas import concat

print(tf.__version__)

2.4.0


In [2]:
!pip install keras-hypetune




In [3]:
from kerashypetune import *

In [4]:
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
    Pandas DataFrame of series framed for supervised learning.
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        # shift
        # se utiliza para desplazar el índice de DataFrame por un número 
        # determinado de períodos con una frecuencia de tiempo opcional
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [5]:
def get_data(df, ventana):
    '''
    '''
    n_cols = df.shape[1] - 1 # Numero de variables incluyendo la variable objetivo
    entrenamiento = 800 #Cuantos dias de entrenamiento (restante sera para el conjunto test)
    n_obs = ventana*n_cols
    
    
    values = df.iloc[1082:,1:].values# Desde el primer dia del 2018
    transformer = StandardScaler()
    transformer.fit(values)

    transformer_y = StandardScaler()
    transformer_y.fit(df.iloc[1082:,-1].values.reshape(-1,1))

    values = transformer.transform(values)
    data = series_to_supervised(values, ventana,1 )
    
    train = data.iloc[:entrenamiento,:]
    test = data.iloc[entrenamiento:,:]

    train_X, train_y = train.iloc[:, :n_obs].values, train.iloc[:, -1].values
    test_X, test_y = test.iloc[:, :n_obs].values, test.iloc[:, -1].values
    train_X = train_X.reshape((train_X.shape[0], ventana, n_cols))
    test_X = test_X.reshape((test_X.shape[0], ventana, n_cols))
    return train_X, train_y, test_X, test_y

In [6]:
def get_model(params):
    '''
    params: diccionario con los parámetros a intentar
        {'dropout':[params], 
        'lambda':[lambda], 
        'epochs':[epochs], 
        'learning_rate': []]
    '''
    tf.keras.backend.clear_session()

    model = tf.keras.models.Sequential([
                       tf.keras.layers.Conv1D(filters=params['conv'], kernel_size=5,
                       strides=1, padding="causal",
                       activation="relu",
                       input_shape=[train_X.shape[1],train_X.shape[2]]),
                       tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(params['lstm'], return_sequences=True)),
                    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(params['lstm'])),
                    tf.keras.layers.Dense(50,activation="relu"),
                    tf.keras.layers.Dropout(params['dropout']),
                    tf.keras.layers.Dense(1),
                    tf.keras.layers.Lambda(lambda x: x * params['lambda'])])

    model.compile(loss=tf.keras.losses.Huber(), optimizer=tf.keras.optimizers.Adam(learning_rate=params['learning_rate']),metrics=["mae"])
    return model

In [7]:
df = pd.read_csv("data_complete.csv")
df.columns

Index(['date', 'dow(USD)', 'euro_stoxx50(USD)', 'nasdaq(USD)', 'nikkei(USD)',
       'oro(USD)', 'petroleo(USD)', 'shangai_stock_exchange(USD)',
       'sp_500(USD)', 'BCHAIN-TOTBC', 'BCHAIN-AVBLS', 'BCHAIN-MIREV (USD)',
       'BCHAIN-HRATE', 'BCHAIN-NTRBL', 'BCHAIN-NADDU', 'eur_usd', 'cny_usd',
       'gbp_usd', 'BCHAIN-MKPRU (USD)'],
      dtype='object')

In [8]:
df.drop(['shangai_stock_exchange(USD)','petroleo(USD)','euro_stoxx50(USD)','dow(USD)'], axis=1, inplace = True)

In [9]:
train_X, train_y, test_X, test_y = get_data(df, 3)

In [None]:
params = {'dropout':[0.2, 0.3],
         'lambda':[5.0,10.0, 15.0],
          'lstm':np.arange(20,50,10),
          'conv':np.arange(20,50,10),
         'epochs':1000,
         'learning_rate': [1e-4, 1e-5, 1e-6]}
kgs = KerasGridSearch(get_model, params, monitor='val_loss', greater_is_better=False)
kgs.search(train_X, train_y, validation_data = (test_X, test_y))


162 trials detected for ('dropout', 'lambda', 'lr', 'conv', 'epochs', 'learning_rate')

***** (1/162) *****
Search({'dropout': 0.2, 'lambda': 10.0, 'lr': 40, 'conv': 40, 'epochs': 1000, 'learning_rate': 1e-05})
SCORE: 0.42139 at epoch 971

***** (2/162) *****
Search({'dropout': 0.2, 'lambda': 10.0, 'lr': 40, 'conv': 40, 'epochs': 1000, 'learning_rate': 0.0001})
