In [None]:
import pandas as pd
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import keras
from keras import optimizers
from keras.callbacks import History
from keras.models import Model
from keras.layers import Dense, Dropout, LSTM, Input, Activation, concatenate, Bidirectional, Layer
from tensorflow.keras import optimizers
from keras.models import Sequential
import keras.backend as K
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
tf.random.set_seed(20)
np.random.seed(10)

In [None]:
dataset = pd.read_excel('day2_final.xlsx')

In [None]:
dataset['VOL'][dataset['VOL'] == dataset['VOL'].max()] = np.NaN

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [None]:
dataset = dataset.fillna(method='backfill')

In [None]:
data = dataset[['VOL', 'sma_10', 'roc',
       'stoch_k', 'cci', 'rsi', 'MACD_12_26',  'adi']]

In [None]:
data['OPEN'] = np.log(dataset['OPEN'])
data['CLOSE'] = np.log(dataset['CLOSE'])
data['CLOSE_lag'] = data['CLOSE'].shift(1)
data['CLOSE_lag2'] = data['CLOSE'].shift(2)
data['CLOSE_lag3'] = data['CLOSE'].shift(3)
data['CLOSE_lag4'] = data['CLOSE'].shift(4)
data['HIGH'] = np.log(dataset['HIGH'])
data['LOW'] = np.log(dataset['LOW'])
data['usdrub'] = np.log(dataset['usdrub'])
data['brent'] = np.log(dataset['brent'])
data['cac'] = np.log(dataset['cac'].astype('float64'))
data['ftse'] = np.log(dataset['ftse'])
data['dedow'] = np.log(dataset['dedow'])
data['OPEN_diff'] = data['OPEN'].diff()
data['CLOSE_diff'] = data['CLOSE'].diff()
data['CLOSE_lag_diff'] = data['CLOSE_lag'].diff()
data['CLOSE_lag_diff2'] = data['CLOSE_lag2'].diff()
data['CLOSE_lag_diff3'] = data['CLOSE_lag3'].diff()
data['CLOSE_lag_diff4'] = data['CLOSE_lag4'].diff()
data['HIGH_diff'] = data['HIGH'].diff()
data['LOW_diff'] = data['LOW'].diff()
data['usd_diff'] = data['usdrub'].diff()
data['brent_diff'] = data['brent'].diff()
data['cac_diff'] = data['cac'].diff()
data['ftse_diff'] = data['ftse'].diff()
data['dedow_diff'] = data['dedow'].diff()

In [None]:
data.columns

Index(['VOL', 'sma_10', 'roc', 'stoch_k', 'cci', 'rsi', 'MACD_12_26', 'adi',
       'OPEN', 'CLOSE', 'CLOSE_lag', 'CLOSE_lag2', 'CLOSE_lag3', 'CLOSE_lag4',
       'HIGH', 'LOW', 'usdrub', 'brent', 'cac', 'ftse', 'dedow', 'OPEN_diff',
       'CLOSE_diff', 'CLOSE_lag_diff', 'CLOSE_lag_diff2', 'CLOSE_lag_diff3',
       'CLOSE_lag_diff4', 'HIGH_diff', 'LOW_diff', 'usd_diff', 'brent_diff',
       'cac_diff', 'ftse_diff', 'dedow_diff'],
      dtype='object')

In [None]:
data = data[5:]

In [None]:
X=data[['VOL', 'sma_10', 'roc', 'stoch_k', 'cci', 'rsi', 'MACD_12_26', 'adi',
       'OPEN', 'CLOSE_lag', 'CLOSE_lag2', 'CLOSE_lag3', 'CLOSE_lag4',
       'HIGH', 'LOW', 'usdrub', 'brent', 'cac', 'ftse', 'dedow', 'OPEN_diff',
       'CLOSE_diff', 'CLOSE_lag_diff', 'CLOSE_lag_diff2', 'CLOSE_lag_diff3',
       'CLOSE_lag_diff4', 'HIGH_diff', 'LOW_diff', 'usd_diff', 'brent_diff',
       'cac_diff', 'ftse_diff', 'dedow_diff']]
Y=data['CLOSE']

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, shuffle = False, test_size=0.2)

In [None]:
train_data=X_train
train_data['CLOSE']=Y_train
test_data=X_test
test_data['CLOSE']=Y_test

In [None]:
normaliser = StandardScaler()
train_normalised_data = normaliser.fit_transform(train_data)
test_normalised_data = normaliser.transform(test_data)

In [None]:
history_points = 4
X_train = np.array([train_normalised_data[i : i + history_points].copy() for i in range(len(train_normalised_data) - history_points)])
y_train = np.array([train_normalised_data[:,-1][i + history_points].copy() for i in range(len(train_normalised_data) - history_points)])
X_test = np.array([test_normalised_data[i : i + history_points].copy() for i in range(len(test_normalised_data) - history_points)])
y_test = np.array([test_normalised_data[:,-1][i + history_points].copy() for i in range(len(test_normalised_data) - history_points)])

In [None]:
n_feats=X_train.shape[2]

In [None]:
class attention(Layer):
    def __init__(self,**kwargs):
        super(attention,self).__init__(**kwargs)
 
    def build(self,input_shape):
        self.W=self.add_weight(name='attention_weight', shape=(input_shape[-1],1), 
                               initializer='random_normal', trainable=True)
        self.b=self.add_weight(name='attention_bias', shape=(input_shape[1],1), 
                               initializer='zeros', trainable=True)        
        super(attention, self).build(input_shape)
 
    def call(self,x):
        # Alignment scores. Pass them through tanh function
        e = K.tanh(K.dot(x,self.W)+self.b)
        # Remove dimension of size 1
        e = K.squeeze(e, axis=-1)   
        # Compute the weights
        alpha = K.softmax(e)
        # Reshape to tensorFlow format
        alpha = K.expand_dims(alpha, axis=-1)
        # Compute the context vector
        context = x * alpha
        context = K.sum(context, axis=1)
        return context

In [None]:
def create_RNN_with_attention(hidden_units, dense_units, input_shape, activation):
    x=Input(shape=input_shape)
    RNN_layer1 = Bidirectional(LSTM(hidden_units, return_sequences=np.True_, dropout=0.2))(x)
    outputs=Dense(dense_units, trainable=True, activation=activation)(RNN_layer1)
    model=Model(x,outputs)
    sgd = optimizers.SGD(learning_rate=0.01, nesterov=True)
    model.compile(loss='mse', optimizer=sgd)    
    return model    
 
model_attention = create_RNN_with_attention(hidden_units=4096, dense_units=1, 
                                  input_shape= (4, 34), activation='relu')

In [None]:
model_attention.fit(X_train, y_train, epochs = 1, batch_size = 64, shuffle=True, validation_split = 0.1) #36



<keras.callbacks.History at 0x7fc63ffa1150>

In [None]:
def create_RNN_with_attention(hidden_units, dense_units, input_shape, activation):
    x=Input(shape=input_shape)
    RNN_layer1 = Bidirectional(LSTM(hidden_units, return_sequences=np.True_, dropout=0.2))(x)
    attention_layer = attention()(RNN_layer1)
    outputs=Dense(dense_units, trainable=True, activation=activation)(attention_layer)
    model=Model(x,outputs)
    sgd = optimizers.SGD(learning_rate=0.01, nesterov=True)
    model.compile(loss='mse', optimizer=sgd)    
    return model    
 
model_attention = create_RNN_with_attention(hidden_units=4096, dense_units=1, 
                                  input_shape= (4, 34), activation='relu')

In [None]:
model_attention.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 4, 34)]           0         
                                                                 
 bidirectional_1 (Bidirectio  (None, 4, 8192)          135364608 
 nal)                                                            
                                                                 
 attention (attention)       (None, 8192)              8196      
                                                                 
 dense (Dense)               (None, 1)                 8193      
                                                                 
Total params: 135,380,997
Trainable params: 135,380,997
Non-trainable params: 0
_________________________________________________________________


In [None]:
### Основная
def create_RNN_with_attention(hidden_units, dense_units, input_shape, activation):
    x=Input(shape=input_shape)
    RNN_layer1 = Bidirectional(LSTM(hidden_units, return_sequences=np.True_, dropout=0.2))(x)
    attention_layer = attention()(RNN_layer1)
    outputs=Dense(dense_units, trainable=True, activation=activation)(attention_layer)
    model=Model(x,outputs)
    sgd = optimizers.SGD(learning_rate=0.01, nesterov=True)
    model.compile(loss='mse', optimizer=sgd)    
    return model    
 
model_attention = create_RNN_with_attention(hidden_units=4096, dense_units=1, 
                                  input_shape= (history_points, n_feats), activation='relu')

In [None]:
y_pred = model_attention.predict(X_test, batch_size = 64)

In [None]:
r2_score(y_test, y_pred[:,0])

-18.82887005169089

одна лстм 0.963789498012317

0.9985052184237352 2 слоя, 10 эпох

0.9988358864383952, e=17

Epoch 8/20
1467/1467 [==============================] - 402s 274ms/step - loss: 0.6764 - val_loss: 2.7466e-04
2 слоя


1 слой


[ ]
model_attention.fit(X_train, y_train, epochs = 30, batch_size = 16, shuffle=True, validation_split = 0.1) #36
Epoch 1/30
1467/1467 [==============================] - 119s 79ms/step - loss: 0.6787 - val_loss: 3.1101e-04
Epoch 2/30
1467/1467 [==============================] - 115s 78ms/step - loss: 0.6765 - val_loss: 2.2937e-04
Epoch 3/30
1467/1467 [==============================] - 120s 82ms/step - loss: 0.6764 - val_loss: 2.2036e-04
Epoch 4/30
1467/1467 [==============================] - 115s 78ms/step - loss: 0.6765 - val_loss: 2.9331e-04
Epoch 5/30
1467/1467 [==============================] - 115s 78ms/step - loss: 0.6765 - val_loss: 2.7359e-04
Epoch 6/30
1467/1467 [==============================] - 120s 82ms/step - loss: 0.6764 - val_loss: 3.7130e-04
Epoch 7/30
1467/1467 [==============================] - 120s 82ms/step - loss: 0.6765 - val_loss: 2.4782e-04
Epoch 8/30
1467/1467 [==============================] - 115s 78ms/step - loss: 0.6764 - val_loss: 2.3836e-04
Epoch 9/30
1467/1467 [==============================] - 115s 78ms/step - loss: 0.6765 - val_loss: 2.9644e-04
Epoch 10/30
1467/1467 [==============================] - 115s 78ms/step - loss: 0.6764 - val_loss: 3.4685e-04
Epoch 11/30
1467/1467 [==============================] - 120s 82ms/step - loss: 0.6764 - val_loss: 3.7837e-04
Epoch 12/30
1467/1467 [==============================] - 115s 78ms/step - loss: 0.6764 - val_loss: 3.5202e-04
Epoch 13/30
1467/1467 [==============================] - 120s 82ms/step - loss: 0.6764 - val_loss: 1.9014e-04
Epoch 14/30
1467/1467 [==============================] - 115s 78ms/step - loss: 0.6764 - val_loss: 6.0173e-04
Epoch 15/30
1467/1467 [==============================] - 115s 78ms/step - loss: 0.6764 - val_loss: 2.0644e-04
Epoch 16/30
1467/1467 [==============================] - 115s 78ms/step - loss: 0.6764 - val_loss: 2.2221e-04
Epoch 17/30
1467/1467 [==============================] - 120s 82ms/step - loss: 0.6765 - val_loss: 1.8249e-04
Epoch 18/30
1467/1467 [==============================] - 120s 82ms/step - loss: 0.6764 - val_loss: 2.7812e-04
Epoch 19/30
1467/1467 [==============================] - 120s 81ms/step - loss: 0.6764 - val_loss: 2.9005e-04
Epoch 20/30
1467/1467 [==============================] - 120s 82ms/step - loss: 0.6764 - val_loss: 1.9832e-04
Epoch 21/30
1467/1467 [==============================] - 120s 82ms/step - loss: 0.6765 - val_loss: 1.8457e-04
Epoch 22/30
1467/1467 [==============================] - 120s 82ms/step - loss: 0.6764 - val_loss: 2.3690e-04
Epoch 23/30
1467/1467 [==============================] - 120s 82ms/step - loss: 0.6764 - val_loss: 2.0100e-04
Epoch 24/30
1467/1467 [==============================] - 116s 79ms/step - loss: 0.6765 - val_loss: 2.6441e-04
Epoch 25/30
1467/1467 [==============================] - 116s 79ms/step - loss: 0.6764 - val_loss: 3.7662e-04
Epoch 26/30
1467/1467 [==============================] - 116s 79ms/step - loss: 0.6765 - val_loss: 1.9328e-04
Epoch 27/30
1467/1467 [==============================] - 116s 79ms/step - loss: 0.6764 - val_loss: 2.0109e-04
Epoch 28/30
1467/1467 [==============================] - 116s 79ms/step - loss: 0.6765 - val_loss: 1.9849e-04
Epoch 29/30
1467/1467 [==============================] - 121s 82ms/step - loss: 0.6764 - val_loss: 1.9826e-04
Epoch 30/30
1467/1467 [==============================] - 116s 79ms/step - loss: 0.6764 - val_loss: 3.9444e-04

In [None]:
normaliser_y = StandardScaler()
Y_train_n = normaliser_y.fit_transform(np.array(Y_train).reshape(-1, 1))
y_test_n = normaliser_y.transform(np.array(y_test).reshape(-1, 1))

In [None]:
y_pred_n = normaliser_y.inverse_transform(y_pred.reshape(-1, 1))

In [None]:
r2_score(np.exp(Y_test)[4:], np.exp(y_pred_n))

0.9928676535015659

In [None]:
yyy = pd.DataFrame(np.exp(y_pred_n))

In [None]:
yyy.to_csv('y_pred_lstm_99.csv')

In [None]:
yyy['0']

KeyError: ignored

In [None]:
def mean_absolute_percentage_error(actual, prediction):
    actual = pd.Series(actual)
    prediction = pd.Series(prediction)
    return 100 * np.mean(np.abs((actual - prediction))/actual)

In [None]:
mse = mean_squared_error(dataset['CLOSE'][-686:], yyy)
rmse = mse ** 0.5
mape = mean_absolute_percentage_error(dataset['CLOSE'][-686:], np.exp(y_pred_n).reshape(-1))
r2 = r2_score(dataset['CLOSE'][-686:], yyy)
mae = mean_absolute_error(dataset['CLOSE'][-686:], yyy)

In [None]:
mse

1823.1414886070318

In [None]:
rmse

42.698260955301585

In [None]:
100 * np.mean(np.abs((dataset['CLOSE'][-686:] - np.exp(y_pred_n).reshape(-1)))/dataset['CLOSE'][-686:])

1.0315701055868152

In [None]:
r2

0.9928676535015659

In [None]:
mae

31.91232177022594

In [None]:
mse = mean_squared_error(dataset['CLOSE'].diff()[-685:], np.zeros(685))
rmse = mse ** 0.5
r2 = r2_score(dataset['CLOSE'].diff()[-685:], np.zeros(685))
mae = mean_absolute_error(dataset['CLOSE'].diff()[-685:], np.zeros(685))

In [None]:
mape = mean_absolute_percentage_error(dataset['CLOSE'].diff()[-685:], np.zeros(685))

In [None]:
mse

1195.010621605839

In [None]:
rmse

34.56892566461734

In [None]:
r2

-0.003777465711201522

In [None]:
mae

24.86163503649634

In [None]:
input_layer = Input(shape=(history_points, n_feats))
conv1 = Conv1D(filters=64,
               kernel_size=8,
               strides=1,
               activation='relu',
               padding='same')(input_layer)
lstm1 = LSTM(64, return_sequences=True, dropout=0.05)(conv1)
output_layer = Dense(1, activation='relu')(lstm1)
model = Model(inputs=input_layer, outputs=output_layer)

sgd = optimizers.SGD(lr=0.05, nesterov=True)
model.compile(loss='mse', optimizer='sgd')

NameError: ignored

In [None]:
yyyy=yyy.diff()[1:]

In [None]:
yyyy

Unnamed: 0,0
1,9.517578
2,13.788086
3,3.727783
4,5.615723
5,-18.963623
...,...
681,58.323242
682,-5.973877
683,-122.288086
684,62.102051


In [None]:
Y_test_ = np.exp(Y_test).diff()

In [None]:
Y_test_n = np.exp(Y_test)

In [None]:
Y_test_n

2765    2468.25
2766    2470.61
2767    2457.79
2768    2476.73
2769    2483.70
         ...   
3450    3947.82
3451    3843.05
3452    3923.58
3453    3890.59
3454    3936.34
Name: CLOSE, Length: 690, dtype: float64

In [None]:
np.exp(Y_test[-685:])

2770    2493.28
2771    2504.23
2772    2507.81
2773    2492.70
2774    2497.59
         ...   
3450    3947.82
3451    3843.05
3452    3923.58
3453    3890.59
3454    3936.34
Name: CLOSE, Length: 685, dtype: float64

In [None]:
yyy[0]

0      2470.983887
1      2480.501465
2      2494.289551
3      2498.017334
4      2503.633057
          ...     
681    3947.497314
682    3941.523438
683    3819.235352
684    3881.337402
685    3846.309570
Name: 0, Length: 686, dtype: float32

In [None]:
Y_test_n

2765    2468.25
2766    2470.61
2767    2457.79
2768    2476.73
2769    2483.70
         ...   
3450    3947.82
3451    3843.05
3452    3923.58
3453    3890.59
3454    3936.34
Name: CLOSE, Length: 690, dtype: float64

In [None]:
profit = 0
profit_hist = []
profitability = 0
profitability_hist = []
for i in range(1,685):
  if yyyy[0][i] > 0:
    profit += Y_test_[i+2769]
    profit_hist.append(profit)
    profitability += Y_test_[i+2769]/Y_test_n[i+2768]
    profitability_hist.append(Y_test_[i+2769]/Y_test_n[i+2768])
  if yyyy[0][i] < 0:
    profit -= Y_test_[i+2769]
    profit_hist.append(profit)
    profitability -= Y_test_[i+2769]/Y_test_n[i+2768]
    profitability_hist.append(Y_test_[i+2769]/Y_test_n[i+2768])

In [None]:
profitability

0.3749000579498917

In [None]:
np.mean(profitability_hist)*100

0.07272425534915485

In [None]:
np.std(profitability_hist)*100

1.1871868948995505

In [None]:
profitability_hist = pd.DataFrame(profitability_hist)

In [None]:
profitability_hist

Unnamed: 0,0
0,0.003857
1,0.004392
2,0.001430
3,-0.006025
4,0.001962
...,...
679,0.009571
680,-0.003058
681,-0.026539
682,0.020955


In [None]:
len(profitability_hist[profitability_hist[0]>0])*100/684

56.14035087719298

In [None]:
len(profitability_hist)

684