**Iniciación del cuaderno interactivo**

Se realiza la importación de algunas librerias que vamos a utilizar, y se muestran los ficheros de entrada al cuaderno. En nuestro caso nuestro fichero en formato CSV.

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import pandas_profiling as pp
from datetime import datetime
from scipy import stats
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler,StandardScaler,scale 
from sklearn.cluster import MeanShift, estimate_bandwidth
import pickle
from keras.models import Sequential, Model, load_model
from keras.layers import LSTM,Input,Dense,Flatten,SpatialDropout1D,Dropout,CuDNNLSTM,Reshape,Concatenate,BatchNormalization
from keras.layers import CuDNNLSTM, RepeatVector, TimeDistributed
from keras.callbacks import ModelCheckpoint


# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
import os
for dirname, _, filenames in os.walk('/kaggle/working'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

**Lectura de fichero CSV**

Se cambia el formato con la que mostramos el tiempo (Estampa de tiempo en ms a Fecha)

In [1]:
data = pd.read_csv("../input/iotusd/IOTUSD.csv",parse_dates=[1], index_col=1, encoding='UTF-8', date_parser=lambda x: datetime.fromtimestamp(int(x) / 1e3)).drop(['Unnamed: 0'], axis = 1)[500:]


**Exploración de los datos**
Mostramos las caracteristicas de los datos en bruto.

In [1]:
data.describe()

In [1]:
data.describe()
data[['OPEN','CLOSE','HIGH','LOW']].plot()
data[['OPEN','CLOSE','HIGH','LOW']].hist()
data[['VOLUME']].plot()
data[['VOLUME']].hist()

In [1]:
prob = stats.probplot(data.OPEN,plot=plt)
plt.show()
prob = stats.probplot(data.HIGH,plot=plt)
plt.show()
prob = stats.probplot(data.VOLUME,plot=plt)
plt.show()

In [1]:
data.kurtosis()

In [1]:
from scipy.cluster import hierarchy
from scipy.spatial import distance
import seaborn as sns

corr_matrix = data.corr()
correlations_array = np.asarray(corr_matrix)

linkage = hierarchy.linkage(distance.pdist(correlations_array), \
                            method='average')

g = sns.clustermap(corr_matrix,row_linkage=linkage,col_linkage=linkage,\
                   row_cluster=True,col_cluster=True,figsize=(10,10),cmap='Greens')
plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
plt.show()

label_order = corr_matrix.iloc[:,g.dendrogram_row.reordered_ind].columns

****Preprocesamos los datos****


In [1]:
pdata = pd.DataFrame()

In [1]:
split_rate = 0.8
columns = data.columns

In [1]:
pdata['OPEN'] = data['OPEN'].pct_change().clip(0.2,-0.2)
pdata['HIGH'] = data['HIGH'].pct_change().clip(0.2,-0.2)
pdata['LOW'] = data['LOW'].pct_change().clip(0.2,-0.2)
pdata['CLOSE'] = data['CLOSE'].pct_change().clip(0.2,-0.2)
pdata['VOLUME'] = data['VOLUME'].apply(np.log1p).clip(14,0)
pdata.dropna(inplace=True)

X_train = pdata[:int(pdata.shape[0]*split_rate)]
X_test = pdata[int(pdata.shape[0]*split_rate):]




In [1]:
scaler = MinMaxScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

X_train = pd.DataFrame(X_train,columns = columns)
X_test = pd.DataFrame(X_test,columns = columns)


Generator

In [1]:
import numpy as np
import keras

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, X,input_columns, output_columns, batch_size, sequence_lenght,steps_ahead,normalise = False):
        'Initialization'
        self.batch_size = batch_size
        self.X = X
        self.input_columns = input_columns
        self.output_columns = output_columns
        self.sequence_lenght = sequence_lenght
        self.steps_ahead = steps_ahead
        self.on_epoch_end()
        self.normalise = normalise

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor((len(self.X)-self.sequence_lenght)/self.batch_size))

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.X))
        
    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate samples of the batch
        data_windows = np.zeros([self.batch_size,self.sequence_lenght,len(self.input_columns)])
        for i in range(self.batch_size):
            data_windows[i] = self.X[index*self.batch_size+i:index*self.batch_size+i+self.sequence_lenght]
        # Generate data
        #X,X = self.__data_generation(data_windows)

        return data_windows,data_windows


    def __data_generation(self, data_windows,):
        'Generates data containing batch_size samples' # X : (n_samples, lenght, input dim) Y :(n_samples,output_dim)
        # Initialization

        X = np.empty((self.batch_size, self.sequence_lenght, len(self.input_columns)))
        # Generate data
        for i,window in enumerate(data_windows):
            X[i,:,:] = window.values

                    

        return X,X

In [1]:
timesteps = 1024
batch_size = 128
n_features = 5
train_generator = DataGenerator(X_train,columns,[],batch_size,timesteps,1,False)
test_generator = DataGenerator(X_test,columns,[],batch_size,timesteps,1,False)

In [1]:
from keras.callbacks import Callback
from keras import backend as K
import numpy as np


class CyclicLR(Callback):
    """This callback implements a cyclical learning rate policy (CLR).
    The method cycles the learning rate between two boundaries with
    some constant frequency.
    # Arguments
        base_lr: initial learning rate which is the
            lower boundary in the cycle.
        max_lr: upper boundary in the cycle. Functionally,
            it defines the cycle amplitude (max_lr - base_lr).
            The lr at any cycle is the sum of base_lr
            and some scaling of the amplitude; therefore
            max_lr may not actually be reached depending on
            scaling function.
        step_size: number of training iterations per
            half cycle. Authors suggest setting step_size
            2-8 x training iterations in epoch.
        mode: one of {triangular, triangular2, exp_range}.
            Default 'triangular'.
            Values correspond to policies detailed above.
            If scale_fn is not None, this argument is ignored.
        gamma: constant in 'exp_range' scaling function:
            gamma**(cycle iterations)
        scale_fn: Custom scaling policy defined by a single
            argument lambda function, where
            0 <= scale_fn(x) <= 1 for all x >= 0.
            mode paramater is ignored
        scale_mode: {'cycle', 'iterations'}.
            Defines whether scale_fn is evaluated on
            cycle number or cycle iterations (training
            iterations since start of cycle). Default is 'cycle'.
    The amplitude of the cycle can be scaled on a per-iteration or
    per-cycle basis.
    This class has three built-in policies, as put forth in the paper.
    "triangular":
        A basic triangular cycle w/ no amplitude scaling.
    "triangular2":
        A basic triangular cycle that scales initial amplitude by half each cycle.
    "exp_range":
        A cycle that scales initial amplitude by gamma**(cycle iterations) at each
        cycle iteration.
    For more detail, please see paper.
    # Example for CIFAR-10 w/ batch size 100:
        ```python
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., mode='triangular')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```
    Class also supports custom scaling functions:
        ```python
            clr_fn = lambda x: 0.5*(1+np.sin(x*np.pi/2.))
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., scale_fn=clr_fn,
                                scale_mode='cycle')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```
    # References
      - [Cyclical Learning Rates for Training Neural Networks](
      https://arxiv.org/abs/1506.01186)
    """

    def __init__(
            self,
            base_lr=0.001,
            max_lr=0.006,
            step_size=2000.,
            mode='triangular',
            gamma=1.,
            scale_fn=None,
            scale_mode='cycle'):
        super(CyclicLR, self).__init__()

        if mode not in ['triangular', 'triangular2',
                        'exp_range']:
            raise KeyError("mode must be one of 'triangular', "
                           "'triangular2', or 'exp_range'")
        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size
        self.mode = mode
        self.gamma = gamma
        if scale_fn is None:
            if self.mode == 'triangular':
                self.scale_fn = lambda x: 1.
                self.scale_mode = 'cycle'
            elif self.mode == 'triangular2':
                self.scale_fn = lambda x: 1 / (2.**(x - 1))
                self.scale_mode = 'cycle'
            elif self.mode == 'exp_range':
                self.scale_fn = lambda x: gamma ** x
                self.scale_mode = 'iterations'
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode
        self.clr_iterations = 0.
        self.trn_iterations = 0.
        self.history = {}

        self._reset()

    def _reset(self, new_base_lr=None, new_max_lr=None,
               new_step_size=None):
        """Resets cycle iterations.
        Optional boundary/step size adjustment.
        """
        if new_base_lr is not None:
            self.base_lr = new_base_lr
        if new_max_lr is not None:
            self.max_lr = new_max_lr
        if new_step_size is not None:
            self.step_size = new_step_size
        self.clr_iterations = 0.

    def clr(self):
        cycle = np.floor(1 + self.clr_iterations / (2 * self.step_size))
        x = np.abs(self.clr_iterations / self.step_size - 2 * cycle + 1)
        if self.scale_mode == 'cycle':
            return self.base_lr + (self.max_lr - self.base_lr) * \
                np.maximum(0, (1 - x)) * self.scale_fn(cycle)
        else:
            return self.base_lr + (self.max_lr - self.base_lr) * \
                np.maximum(0, (1 - x)) * self.scale_fn(self.clr_iterations)

    def on_train_begin(self, logs={}):
        logs = logs or {}

        if self.clr_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.base_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.clr())

    def on_batch_end(self, epoch, logs=None):

        logs = logs or {}
        self.trn_iterations += 1
        self.clr_iterations += 1
        K.set_value(self.model.optimizer.lr, self.clr())

        self.history.setdefault(
            'lr', []).append(
            K.get_value(
                self.model.optimizer.lr))
        self.history.setdefault('iterations', []).append(self.trn_iterations)

        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        logs['lr'] = K.get_value(self.model.optimizer.lr)

In [1]:
model = Sequential()
model.add(CuDNNLSTM(100,input_shape=(timesteps,n_features),return_sequences=True))
model.add(CuDNNLSTM(50,input_shape=(timesteps,n_features),return_sequences=False))
model.add(RepeatVector(timesteps))
model.add(CuDNNLSTM(50,return_sequences=True))
model.add(CuDNNLSTM(100,return_sequences=True))
model.add(TimeDistributed(Dense(n_features)))
model.compile(optimizer='adam', loss='mse')



In [1]:
filepath="1024-weights.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
cyclic = CyclicLR(mode='triangular')
model.fit_generator(train_generator,epochs = 5, validation_data = test_generator,callbacks = [checkpoint,cyclic])

In [1]:
prediction = model.predict(train_generator[0][0])

In [1]:
plt.plot(train_generator[0][0][6])

In [1]:
plt.plot(prediction[60])