In [1]:
import yfinance as yf
import datetime as dt
import pandas_datareader as pdr
import pandas as pd
import pandas_ta as pta
import matplotlib.pyplot as plt
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import MaxPooling1D



## Descarga datos y creación dataset

In [2]:
sample_per = 400
stock = 'EURGBP=X'

end = dt.date.today()- dt.timedelta(days=2)
start = end - dt.timedelta(days=(sample_per))
interval = '1h'

In [3]:
database = yf.download(stock, start=start, end=end, interval=interval)

[*********************100%***********************]  1 of 1 completed


In [4]:
#EMA (200,50,20)
database['200ema'] = database['Adj Close'].ewm(span=200).mean()
database['50ema'] = database['Adj Close'].ewm(span=50, adjust=False).mean()
database['20ema'] = database['Adj Close'].ewm(span=20, adjust=False).mean()

In [5]:
database.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,200ema,50ema,20ema
2020-07-29 23:00:00+01:00,0.9066,0.9076,0.90656,0.9073,0.9073,0,0.9073,0.9073,0.9073
2020-07-30 00:00:00+01:00,0.90746,0.90781,0.9071,0.90751,0.90751,0,0.907406,0.907308,0.90732
2020-07-30 01:00:00+01:00,0.90751,0.90774,0.90664,0.90702,0.90702,0,0.907276,0.907297,0.907291
2020-07-30 02:00:00+01:00,0.90697,0.90717,0.9066,0.90683,0.90683,0,0.907163,0.907279,0.907247
2020-07-30 03:00:00+01:00,0.90688,0.9073,0.9065,0.90689,0.90689,0,0.907107,0.907263,0.907213


In [6]:
data = database.copy()

data['Adj Close shi'] = data['Adj Close'].shift(-1)

data = data[['Adj Close shi','200ema','50ema','20ema']]

data.dropna(inplace=True)


data

Unnamed: 0,Adj Close shi,200ema,50ema,20ema
2020-07-29 23:00:00+01:00,0.90751,0.907300,0.907300,0.907300
2020-07-30 00:00:00+01:00,0.90702,0.907406,0.907308,0.907320
2020-07-30 01:00:00+01:00,0.90683,0.907276,0.907297,0.907291
2020-07-30 02:00:00+01:00,0.90689,0.907163,0.907279,0.907247
2020-07-30 03:00:00+01:00,0.90689,0.907107,0.907263,0.907213
...,...,...,...,...
2021-09-02 17:00:00+01:00,0.85828,0.857259,0.858798,0.858815
2021-09-02 18:00:00+01:00,0.85797,0.857269,0.858778,0.858764
2021-09-02 19:00:00+01:00,0.85812,0.857276,0.858746,0.858689
2021-09-02 20:00:00+01:00,0.85811,0.857285,0.858722,0.858635


## Preparacion datos para el LSTM

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6780 entries, 2020-07-29 23:00:00+01:00 to 2021-09-02 21:00:00+01:00
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Adj Close shi  6780 non-null   float64
 1   200ema         6780 non-null   float64
 2   50ema          6780 non-null   float64
 3   20ema          6780 non-null   float64
dtypes: float64(4)
memory usage: 264.8 KB


In [8]:
#Escalamos los valores con un MinMaxScaler
values = data.values

scaler = MinMaxScaler(feature_range=(-1, 1))
scaled = scaler.fit_transform(values)

values = scaled


#Dividimos los datos entre train y test
train, val = train_test_split(values, test_size=0.2, shuffle=False)

#Dividimos datos validation entre validation y test
val, test = train_test_split(val, test_size=0.5, shuffle=False)

#Separamos los inputs de los outputs
train_X, train_y = train[:, 1:], train[:,0]
val_X, val_y = val[:, 1:], val[:,0]
test_X, test_y = test[:, 1:], test[:, 0]


#Generamos el reshape para poder incluir los valores en el modelo LSTM
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
val_X = val_X.reshape((val_X.shape[0], 1, val_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, val_X.shape, val_y.shape, test_X.shape, test_y.shape)

(5424, 1, 3) (5424,) (678, 1, 3) (678,) (678, 1, 3) (678,)


## Creamos el modelo

In [9]:
#Generamos arquitectura del modelo
model = Sequential()

model.add(LSTM(64,  return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dropout(0.2))

model.add(LSTM(128, activation='tanh', return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(256, activation='tanh', return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(512, activation='tanh', return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(1024, activation='relu', return_sequences=True))
model.add(Dropout(0.2))

model.add(Dense(1, activation='linear'))

model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 1, 64)             17408     
_________________________________________________________________
dropout (Dropout)            (None, 1, 64)             0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 1, 128)            98816     
_________________________________________________________________
dropout_1 (Dropout)          (None, 1, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 1, 256)            394240    
_________________________________________________________________
dropout_2 (Dropout)          (None, 1, 256)            0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 1, 512)            1

In [11]:
#Entrenamos el modelo
history = model.fit(train_X, train_y, epochs=15, batch_size = 128, validation_data=[val_X, val_y], verbose=1, shuffle=False)

Epoch 1/15

ValueError: in user code:

    C:\Users\Usuario\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:1323 test_function  *
        return step_function(self, iterator)
    C:\Users\Usuario\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:1314 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\Usuario\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1285 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\Usuario\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2833 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\Usuario\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3608 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\Usuario\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:1307 run_step  **
        outputs = model.test_step(data)
    C:\Users\Usuario\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:1266 test_step
        y_pred = self(x, training=False)
    C:\Users\Usuario\anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:1013 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    C:\Users\Usuario\anaconda3\lib\site-packages\tensorflow\python\keras\engine\input_spec.py:200 assert_input_compatibility
        raise ValueError('Layer ' + layer_name + ' expects ' +

    ValueError: Layer sequential expects 1 input(s), but it received 2 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, 1, 3) dtype=float32>, <tf.Tensor 'ExpandDims:0' shape=(None, 1) dtype=float32>]


In [None]:
#Generamos plot con los resultados
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()

## Resultados con valores de test

In [None]:
# make a prediction
yhat = model.predict(test_X)

In [None]:
yhat_re = yhat.reshape((yhat.shape[0], yhat.shape[1]))

test_X_re = test_X.reshape((test_X.shape[0], test_X.shape[2]))

inv_yhat = np.concatenate((yhat_re, test_X_re), axis=1)

inv_yhat = scaler.inverse_transform(inv_yhat)

inv_yhat = inv_yhat[:,0]

In [None]:
test_y_re = test_y.reshape((len(test_y), 1))

inv_y = np.concatenate((test_y_re, test_X_re), axis=1)

inv_y = scaler.inverse_transform(inv_y)

inv_y = inv_y[:,0]

In [None]:
rmse = np.sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)

In [None]:
res = pd.DataFrame({'yhat':inv_yhat, 'y':inv_y})

res['yhat_log'] = res['yhat'].pct_change()
res['y_log'] = res['y'].pct_change()

res['res'] = res['yhat_log']*res['y_log']

lista =[]
for index, row in res.iterrows():
    if row['res'] > 0:
        lista.append(1)
    else:
        lista.append(-1)

serie = pd.Series(lista, index = res.index)

res['Up_Down'] = serie

res['Up_Down'].value_counts()

## Resultados con todos los valores

In [None]:
values_X, values_y = values[:, 1:], values[:,0]

values_X = values_X.reshape((values_X.shape[0], 1, values_X.shape[1]))

In [None]:
# make a prediction
yhat = model.predict(values_X)

In [None]:
yhat_re = yhat.reshape((yhat.shape[0], yhat.shape[1]))

values_X_re = values_X.reshape((values_X.shape[0], values_X.shape[2]))

inv_yhat = np.concatenate((yhat_re, values_X_re), axis=1)

inv_yhat = scaler.inverse_transform(inv_yhat)

inv_yhat = inv_yhat[:,0]

In [None]:
values_y_re = values_y.reshape((len(values_y), 1))

inv_y = np.concatenate((values_y_re, values_X_re), axis=1)

inv_y = scaler.inverse_transform(inv_y)

inv_y = inv_y[:,0]

In [None]:
rmse = np.sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)

In [None]:
res = pd.DataFrame({'yhat':inv_yhat, 'y':inv_y})

res['yhat_log'] = res['yhat'].pct_change()
res['y_log'] = res['y'].pct_change()

res['res'] = res['yhat_log']*res['y_log']

lista =[]
for index, row in res.iterrows():
    if row['res'] > 0:
        lista.append(1)
    else:
        lista.append(-1)

serie = pd.Series(lista, index = res.index)

res['Up_Down'] = serie

print(res['Up_Down'].value_counts())
print()
per = res['Up_Down'].value_counts()[1]/(res['Up_Down'].value_counts()[1]+res['Up_Down'].value_counts()[-1])
print(f'% acierto: {per}')