In [1]:
import pandas as pd
import numpy as np
import requests
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, LSTM
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Masking
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.metrics import MAPE, MeanAbsoluteError
from keras.callbacks import EarlyStopping
from tensorflow.keras.layers.experimental.preprocessing import Normalization
import tensorflow as tf 

In [2]:
data_gemini = pd.read_csv("../data/BTCUSD_4hours.csv")

In [3]:
data_gemini = data_gemini.drop(columns="Unnamed: 0").set_index("date")

# Functions and Pipeline

In [4]:
def add_ema(data, tspan=[12,26,20,50,34,55]):
    """
    Adds Exponential Moving Averages (EMA) to the dataframe. The default timeframes are 12,26,20,50,34 and 55.
    """
    for t in tspan:
        data[f'ema{t}'] = data.log_close.ewm(span=t).mean()
    return data

In [5]:
add_ema(data_gemini)

Unnamed: 0_level_0,open,high,low,close,volume,log_open,log_high,log_low,log_close,ema12,ema26,ema20,ema50,ema34,ema55
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2011-12-31 08:00:00,4.390000,4.390000,4.390000,4.390000,0.455581,1.479329,1.479329,1.479329,1.479329,1.479329,1.479329,1.479329,1.479329,1.479329,1.479329
2011-12-31 16:00:00,4.490000,4.513333,4.490000,4.513333,31.620766,1.501702,1.506847,1.501702,1.506847,1.494235,1.493617,1.493776,1.493363,1.493493,1.493338
2012-01-01 04:00:00,4.580000,4.580000,4.580000,4.580000,1.502000,1.521699,1.521699,1.521699,1.521699,1.504954,1.503707,1.504029,1.503189,1.503453,1.503138
2012-01-01 16:00:00,4.840000,4.840000,4.840000,4.840000,10.000000,1.576915,1.576915,1.576915,1.576915,1.527669,1.524173,1.525070,1.522741,1.523470,1.522600
2012-01-01 20:00:00,5.000000,5.000000,5.000000,5.000000,10.100000,1.609438,1.609438,1.609438,1.609438,1.549885,1.543946,1.545478,1.541494,1.542744,1.541253
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-22 12:00:00,48769.308417,48787.711375,48749.417500,48769.737750,1.697971,10.794844,10.795222,10.794435,10.794853,10.792529,10.775250,10.781677,10.756861,10.768406,10.753323
2021-08-22 16:00:00,48591.665708,48605.941333,48574.903375,48591.580833,0.463362,10.791204,10.791498,10.790859,10.791202,10.792325,10.776432,10.782584,10.758207,10.769709,10.754676
2021-08-22 20:00:00,48807.713542,48825.874417,48791.558000,48810.168750,0.605696,10.795610,10.795982,10.795279,10.795660,10.792838,10.777856,10.783830,10.759676,10.771192,10.756140
2021-08-23 00:00:00,49779.332208,49803.461792,49759.784208,49783.003708,1.941360,10.815334,10.815818,10.814942,10.815408,10.796311,10.780638,10.786837,10.761862,10.773719,10.758256


In [6]:
def computeRSI(data, window=14):
    """
    Computes the Relative Stregth Index for a given dataset and the window can be defined. Its default value is 14.
    """
    diff = data.diff(1).dropna()        # diff in one field(one day)

    #this preservers dimensions off diff values
    up_chg = 0 * diff
    down_chg = 0 * diff
    
    # up change is equal to the positive difference, otherwise equal to zero
    up_chg[diff > 0] = diff[ diff>0 ]
    # down change is equal to negative deifference, otherwise equal to zero
    down_chg[diff < 0] = diff[ diff < 0 ]
    
    # check pandas documentation for ewm
    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ewm.html
    # values are related to exponential decay
    # we set com=window-1 so we get decay alpha=1/window
    up_chg_avg   = up_chg.ewm(com=window-1 , min_periods=window).mean()
    down_chg_avg = down_chg.ewm(com=window-1 , min_periods=window).mean()
    
    rs = abs(up_chg_avg/down_chg_avg)
    rsi = 100 - 100/(1+rs)
    return rsi



In [7]:
rsi = computeRSI(data_gemini)

In [8]:
def stoch_rsi(rsi, d_window=3, k_window=3, window=14):
    """
    Computes the stochastic RSI. Default values are d=3, k=3, window=14.
    """
    minrsi = rsi.rolling(window=window, center=False).min()
    maxrsi = rsi.rolling(window=window, center=False).max()
    stoch = ((rsi - minrsi) / (maxrsi - minrsi)) * 100
    K = stoch.rolling(window=k_window, center=False).mean()
    D = K.rolling(window=d_window, center=False).mean() 
    return K, D  

In [9]:
stoch_rsi(rsi)

(                          open       high        low      close     volume  \
 date                                                                         
 2011-12-31 16:00:00        NaN        NaN        NaN        NaN        NaN   
 2012-01-01 04:00:00        NaN        NaN        NaN        NaN        NaN   
 2012-01-01 16:00:00        NaN        NaN        NaN        NaN        NaN   
 2012-01-01 20:00:00        NaN        NaN        NaN        NaN        NaN   
 2012-01-02 20:00:00        NaN        NaN        NaN        NaN        NaN   
 ...                        ...        ...        ...        ...        ...   
 2021-08-22 12:00:00  35.005390  34.479767  35.897418  34.998652  16.325185   
 2021-08-22 16:00:00  18.222981  18.007879  18.629785  18.179354  19.001155   
 2021-08-22 20:00:00   4.737093   4.891657   4.703528   4.725876  26.991820   
 2021-08-23 00:00:00  26.473348  26.634274  26.517292  26.555930  45.622606   
 2021-08-23 04:00:00  55.064877  55.163461  55.17912

In [10]:
def add_stoch_rsi(data, d_window=3, k_window=3, window=14):
    data['rsi'] = computeRSI(data['log_close'], window)
    data['K'], data['D'] = stoch_rsi(data['rsi'], d_window, k_window, window)
    return data

In [11]:
add_stoch_rsi(data_gemini)

Unnamed: 0_level_0,open,high,low,close,volume,log_open,log_high,log_low,log_close,ema12,ema26,ema20,ema50,ema34,ema55,rsi,K,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2011-12-31 08:00:00,4.390000,4.390000,4.390000,4.390000,0.455581,1.479329,1.479329,1.479329,1.479329,1.479329,1.479329,1.479329,1.479329,1.479329,1.479329,,,
2011-12-31 16:00:00,4.490000,4.513333,4.490000,4.513333,31.620766,1.501702,1.506847,1.501702,1.506847,1.494235,1.493617,1.493776,1.493363,1.493493,1.493338,,,
2012-01-01 04:00:00,4.580000,4.580000,4.580000,4.580000,1.502000,1.521699,1.521699,1.521699,1.521699,1.504954,1.503707,1.504029,1.503189,1.503453,1.503138,,,
2012-01-01 16:00:00,4.840000,4.840000,4.840000,4.840000,10.000000,1.576915,1.576915,1.576915,1.576915,1.527669,1.524173,1.525070,1.522741,1.523470,1.522600,,,
2012-01-01 20:00:00,5.000000,5.000000,5.000000,5.000000,10.100000,1.609438,1.609438,1.609438,1.609438,1.549885,1.543946,1.545478,1.541494,1.542744,1.541253,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-22 12:00:00,48769.308417,48787.711375,48749.417500,48769.737750,1.697971,10.794844,10.795222,10.794435,10.794853,10.792529,10.775250,10.781677,10.756861,10.768406,10.753323,62.080327,36.210154,50.235845
2021-08-22 16:00:00,48591.665708,48605.941333,48574.903375,48591.580833,0.463362,10.791204,10.791498,10.790859,10.791202,10.792325,10.776432,10.782584,10.758207,10.769709,10.754676,59.525060,19.004941,36.146357
2021-08-22 20:00:00,48807.713542,48825.874417,48791.558000,48810.168750,0.605696,10.795610,10.795982,10.795279,10.795660,10.792838,10.777856,10.783830,10.759676,10.771192,10.756140,61.603611,5.196078,20.137058
2021-08-23 00:00:00,49779.332208,49803.461792,49759.784208,49783.003708,1.941360,10.815334,10.815818,10.814942,10.815408,10.796311,10.780638,10.786837,10.761862,10.773719,10.758256,69.159095,26.741234,16.980751


In [12]:
def get_bollinger_bands(prices, rate=20):
    sma = prices.rolling(rate).mean() # <-- Get SMA for 20 days
    std = prices.rolling(rate).std() # <-- Get rolling standard deviation for 20 days
    bollinger_up = sma + std * 2 # Calculate top band
    bollinger_down = sma - std * 2 # Calculate bottom band
    return sma, bollinger_up, bollinger_down

In [13]:
prices = data_gemini["log_close"]

In [14]:
get_bollinger_bands(prices, rate=20)

(date
 2011-12-31 08:00:00          NaN
 2011-12-31 16:00:00          NaN
 2012-01-01 04:00:00          NaN
 2012-01-01 16:00:00          NaN
 2012-01-01 20:00:00          NaN
                          ...    
 2021-08-22 12:00:00    10.777789
 2021-08-22 16:00:00    10.782060
 2021-08-22 20:00:00    10.786118
 2021-08-23 00:00:00    10.789987
 2021-08-23 04:00:00    10.793803
 Name: log_close, Length: 20840, dtype: float64,
 date
 2011-12-31 08:00:00          NaN
 2011-12-31 16:00:00          NaN
 2012-01-01 04:00:00          NaN
 2012-01-01 16:00:00          NaN
 2012-01-01 20:00:00          NaN
                          ...    
 2021-08-22 12:00:00    10.840219
 2021-08-22 16:00:00    10.834660
 2021-08-22 20:00:00    10.828258
 2021-08-23 00:00:00    10.827486
 2021-08-23 04:00:00    10.829215
 Name: log_close, Length: 20840, dtype: float64,
 date
 2011-12-31 08:00:00          NaN
 2011-12-31 16:00:00          NaN
 2012-01-01 04:00:00          NaN
 2012-01-01 16:00:00          NaN


In [15]:
def add_bollinger(data, prices, rate=20):
    data['sma'], data['bollinger_up'], data['bollinger_down'] = get_bollinger_bands(prices)
    return data

In [16]:
add_bollinger(data_gemini,prices)

Unnamed: 0_level_0,open,high,low,close,volume,log_open,log_high,log_low,log_close,ema12,...,ema20,ema50,ema34,ema55,rsi,K,D,sma,bollinger_up,bollinger_down
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-12-31 08:00:00,4.390000,4.390000,4.390000,4.390000,0.455581,1.479329,1.479329,1.479329,1.479329,1.479329,...,1.479329,1.479329,1.479329,1.479329,,,,,,
2011-12-31 16:00:00,4.490000,4.513333,4.490000,4.513333,31.620766,1.501702,1.506847,1.501702,1.506847,1.494235,...,1.493776,1.493363,1.493493,1.493338,,,,,,
2012-01-01 04:00:00,4.580000,4.580000,4.580000,4.580000,1.502000,1.521699,1.521699,1.521699,1.521699,1.504954,...,1.504029,1.503189,1.503453,1.503138,,,,,,
2012-01-01 16:00:00,4.840000,4.840000,4.840000,4.840000,10.000000,1.576915,1.576915,1.576915,1.576915,1.527669,...,1.525070,1.522741,1.523470,1.522600,,,,,,
2012-01-01 20:00:00,5.000000,5.000000,5.000000,5.000000,10.100000,1.609438,1.609438,1.609438,1.609438,1.549885,...,1.545478,1.541494,1.542744,1.541253,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-22 12:00:00,48769.308417,48787.711375,48749.417500,48769.737750,1.697971,10.794844,10.795222,10.794435,10.794853,10.792529,...,10.781677,10.756861,10.768406,10.753323,62.080327,36.210154,50.235845,10.777789,10.840219,10.715358
2021-08-22 16:00:00,48591.665708,48605.941333,48574.903375,48591.580833,0.463362,10.791204,10.791498,10.790859,10.791202,10.792325,...,10.782584,10.758207,10.769709,10.754676,59.525060,19.004941,36.146357,10.782060,10.834660,10.729461
2021-08-22 20:00:00,48807.713542,48825.874417,48791.558000,48810.168750,0.605696,10.795610,10.795982,10.795279,10.795660,10.792838,...,10.783830,10.759676,10.771192,10.756140,61.603611,5.196078,20.137058,10.786118,10.828258,10.743977
2021-08-23 00:00:00,49779.332208,49803.461792,49759.784208,49783.003708,1.941360,10.815334,10.815818,10.814942,10.815408,10.796311,...,10.786837,10.761862,10.773719,10.758256,69.159095,26.741234,16.980751,10.789987,10.827486,10.752488


In [17]:
def add_vol_roc(data):
    data['vol_roc'] = data.volume.pct_change()
    return data

In [18]:
add_vol_roc(data_gemini)

Unnamed: 0_level_0,open,high,low,close,volume,log_open,log_high,log_low,log_close,ema12,...,ema50,ema34,ema55,rsi,K,D,sma,bollinger_up,bollinger_down,vol_roc
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-12-31 08:00:00,4.390000,4.390000,4.390000,4.390000,0.455581,1.479329,1.479329,1.479329,1.479329,1.479329,...,1.479329,1.479329,1.479329,,,,,,,
2011-12-31 16:00:00,4.490000,4.513333,4.490000,4.513333,31.620766,1.501702,1.506847,1.501702,1.506847,1.494235,...,1.493363,1.493493,1.493338,,,,,,,68.407580
2012-01-01 04:00:00,4.580000,4.580000,4.580000,4.580000,1.502000,1.521699,1.521699,1.521699,1.521699,1.504954,...,1.503189,1.503453,1.503138,,,,,,,-0.952500
2012-01-01 16:00:00,4.840000,4.840000,4.840000,4.840000,10.000000,1.576915,1.576915,1.576915,1.576915,1.527669,...,1.522741,1.523470,1.522600,,,,,,,5.657790
2012-01-01 20:00:00,5.000000,5.000000,5.000000,5.000000,10.100000,1.609438,1.609438,1.609438,1.609438,1.549885,...,1.541494,1.542744,1.541253,,,,,,,0.010000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-22 12:00:00,48769.308417,48787.711375,48749.417500,48769.737750,1.697971,10.794844,10.795222,10.794435,10.794853,10.792529,...,10.756861,10.768406,10.753323,62.080327,36.210154,50.235845,10.777789,10.840219,10.715358,2.306553
2021-08-22 16:00:00,48591.665708,48605.941333,48574.903375,48591.580833,0.463362,10.791204,10.791498,10.790859,10.791202,10.792325,...,10.758207,10.769709,10.754676,59.525060,19.004941,36.146357,10.782060,10.834660,10.729461,-0.727108
2021-08-22 20:00:00,48807.713542,48825.874417,48791.558000,48810.168750,0.605696,10.795610,10.795982,10.795279,10.795660,10.792838,...,10.759676,10.771192,10.756140,61.603611,5.196078,20.137058,10.786118,10.828258,10.743977,0.307177
2021-08-23 00:00:00,49779.332208,49803.461792,49759.784208,49783.003708,1.941360,10.815334,10.815818,10.814942,10.815408,10.796311,...,10.761862,10.773719,10.758256,69.159095,26.741234,16.980751,10.789987,10.827486,10.752488,2.205170


In [19]:
data_gemini = data_gemini.dropna()

In [113]:
data_gemini.shape

(20809, 22)

### Temporal Train-Test Split

In [20]:
train_percentage = 80

In [21]:
train_len = int(len(data_gemini) * train_percentage/100)
train_len

16647

In [22]:
data_train = data_gemini[:train_len]
data_test = data_gemini[train_len:]

In [23]:
print(data_train.shape)
print(data_test.shape)

(16647, 22)
(4162, 22)


### scaling

In [24]:
from sklearn.preprocessing import MinMaxScaler

minmax_scaler = MinMaxScaler(feature_range = (0,1))

minmax_scaler.fit(data_train)

data_train_scaled = minmax_scaler.transform(data_train)
data_test_scaled = minmax_scaler.transform(data_test)

In [25]:
min1 = minmax_scaler.data_min_[5:9]
min1

array([1.44133982, 1.44133982, 1.44133982, 1.44133982])

In [26]:
range1 = minmax_scaler.data_range_[5:9]
range1

array([8.43681808, 8.43734217, 8.43565269, 8.43679108])

## Basic Model

### create x and y array

In [27]:
window_size = 72 #because we want to base our prediction on 3 weeks (72*4 hours)
horizon = 1 #number of predictions into the future :) 
data_train_subsequences = []
fake_y_train = []
for k in range(len(data_train_scaled)-(window_size-1)-horizon):
    data_train_subsequences.append(data_train_scaled[k:k+window_size])
    fake_y_train.append(data_train_scaled[k+window_size])

In [28]:
len(data_train_subsequences)

16575

In [29]:
len(fake_y_train)

16575

In [30]:
X_train = np.array(data_train_subsequences)
y_train = np.array(fake_y_train)[:,5:9]

In [31]:
print(X_train.shape)
print(y_train.shape)

(16575, 72, 22)
(16575, 4)


In [32]:
X_train

array([[[1.49391660e-04, 1.49313498e-04, 1.49565734e-04, ...,
         5.24377305e-02, 4.12067187e-02, 2.18896790e-03],
        [1.43920378e-04, 1.43845078e-04, 1.44088076e-04, ...,
         5.18025280e-02, 4.42378071e-02, 5.34070036e-04],
        [1.52548169e-04, 1.52685252e-04, 1.52725921e-04, ...,
         5.26109407e-02, 4.54490028e-02, 1.34748629e-03],
        ...,
        [9.04043952e-05, 9.03570951e-05, 9.05097358e-05, ...,
         3.75846118e-02, 1.90786163e-02, 3.63174507e-03],
        [8.52750678e-05, 8.52304514e-05, 8.53744317e-05, ...,
         3.62594410e-02, 1.96558829e-02, 1.23363373e-04],
        [9.91242518e-05, 9.90723894e-05, 9.92397529e-05, ...,
         3.64121111e-02, 1.95796610e-02, 1.38079786e-02]],

       [[1.43920378e-04, 1.43845078e-04, 1.44088076e-04, ...,
         5.18025280e-02, 4.42378071e-02, 5.34070036e-04],
        [1.52548169e-04, 1.52685252e-04, 1.52725921e-04, ...,
         5.26109407e-02, 4.54490028e-02, 1.34748629e-03],
        [1.39645938e-04, 

In [33]:
y_train

array([[0.04495631, 0.045052  , 0.04488845, 0.04498708],
       [0.04561404, 0.04561121, 0.04562034, 0.04561419],
       [0.04541636, 0.04541354, 0.04542264, 0.04541651],
       ...,
       [0.89664236, 0.8966277 , 0.89669923, 0.89659783],
       [0.89603634, 0.89602784, 0.89608533, 0.89599154],
       [0.89523672, 0.89526182, 0.89526529, 0.89520348]])

### Initialize a model & set layers

In [59]:
# Initialize a model, add a masking layer,LSTM,Dense Layers 
model = Sequential()
#model.add(normalizer)
#model.add(layers.Masking(mask_value=-1))
model.add(layers.LSTM(units=16, activation='tanh',input_shape = X_train[0].shape, return_sequences = True)) # first run 64 units
model.add(layers.LSTM(units=12,return_sequences = False, activation = "tanh"))
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dense(4, activation="relu"))

model.summary()


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 72, 16)            2496      
_________________________________________________________________
lstm_3 (LSTM)                (None, 12)                1392      
_________________________________________________________________
dense_4 (Dense)              (None, 10)                130       
_________________________________________________________________
dense_5 (Dense)              (None, 4)                 44        
Total params: 4,062
Trainable params: 4,062
Non-trainable params: 0
_________________________________________________________________


### Compile Model 


In [60]:
# later add: es = EarlyStopping(patience=3, restore_best_weights=True)

model.compile(optimizer='rmsprop', loss="mse", metrics="mae")

### Fit Model

In [61]:
model.fit(X_train,y_train,epochs=10, batch_size=64, validation_split=0.3)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x171575700>

## create X_test & y_test

In [37]:
#with the same window_size & horizon as the train data
window_size = 72
horizon = 1
data_test_subsequences = []
fake_y_test = []
for k in range(len(data_test_scaled)-(window_size-1)-horizon):
    data_test_subsequences.append(data_test_scaled[k:k+window_size])
    fake_y_test.append(data_test_scaled[k+window_size])

In [38]:
X_test = np.array(data_test_subsequences)
y_test = np.array(fake_y_test)[:,5:9]

In [39]:
print(X_test.shape)
print(y_test.shape)

(4090, 72, 22)
(4090, 4)


In [40]:
y_test

array([[0.89919302, 0.89921484, 0.89926036, 0.89919776],
       [0.89920248, 0.89922101, 0.89928913, 0.89921206],
       [0.89885214, 0.89885762, 0.89893084, 0.89885577],
       ...,
       [1.10874385, 1.10871904, 1.10885782, 1.10875337],
       [1.11108174, 1.11107007, 1.11118873, 1.11109402],
       [1.11226691, 1.11223155, 1.11238067, 1.11226782]])

### Predict 

In [62]:
predictions = model.predict(X_test)

In [63]:
y_pred = np.exp((predictions * range1 + min1))

In [69]:
y_pred

array([[ 8083.37672811,  7903.10004408,  7351.21620967,  6520.69687392],
       [ 7977.66670893,  7867.33118136,  7360.03698582,  6350.3604982 ],
       [ 7889.350915  ,  7861.19306042,  7398.33445856,  6216.5991834 ],
       ...,
       [11960.71033992, 19266.52581609, 19570.66908812,  9683.62410944],
       [11955.52677069, 19210.44881805, 19650.59661993,  9588.85566822],
       [11979.94283178, 19380.71128103, 19893.46337025,  9649.42198621]])

In [64]:
y_test_unscaled = np.exp((y_test * range1 + min1))

In [65]:
len(y_pred)

4090

In [66]:
len(y_test_unscaled)

4090

In [70]:
y_test_unscaled

array([[ 8330.40797327,  8335.86954951,  8326.41193756,  8330.53907185],
       [ 8331.07347052,  8336.30347046,  8328.43317526,  8331.54416983],
       [ 8306.4846931 ,  8310.78284691,  8303.29891908,  8306.53743132],
       ...,
       [48806.07211847, 48824.21556822, 48789.93138288, 48808.53095339],
       [49778.2939647 , 49802.38679926, 49758.77085281, 49781.95970658],
       [50278.52718711, 50292.84080917, 50261.60947133, 50277.40641224]])

In [71]:
y_pred.shape

(4090, 4)

In [68]:
np.mean(np.abs(y_test_unscaled - y_pred))

11811.406791339745

## Try with real prices 

In [102]:
data_gemini.tail()

Unnamed: 0_level_0,open,high,low,close,volume,log_open,log_high,log_low,log_close,ema12,...,ema50,ema34,ema55,rsi,K,D,sma,bollinger_up,bollinger_down,vol_roc
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-08-22 12:00:00,48769.308417,48787.711375,48749.4175,48769.73775,1.697971,10.794844,10.795222,10.794435,10.794853,10.792529,...,10.756861,10.768406,10.753323,62.080327,36.210154,50.235845,10.777789,10.840219,10.715358,2.306553
2021-08-22 16:00:00,48591.665708,48605.941333,48574.903375,48591.580833,0.463362,10.791204,10.791498,10.790859,10.791202,10.792325,...,10.758207,10.769709,10.754676,59.52506,19.004941,36.146357,10.78206,10.83466,10.729461,-0.727108
2021-08-22 20:00:00,48807.713542,48825.874417,48791.558,48810.16875,0.605696,10.79561,10.795982,10.795279,10.79566,10.792838,...,10.759676,10.771192,10.75614,61.603611,5.196078,20.137058,10.786118,10.828258,10.743977,0.307177
2021-08-23 00:00:00,49779.332208,49803.461792,49759.784208,49783.003708,1.94136,10.815334,10.815818,10.814942,10.815408,10.796311,...,10.761862,10.773719,10.758256,69.159095,26.741234,16.980751,10.789987,10.827486,10.752488,2.20517
2021-08-23 04:00:00,50278.596788,50292.908182,50261.680727,50277.475152,1.908007,10.825333,10.825618,10.824997,10.825311,10.800772,...,10.76435,10.776667,10.760651,72.121754,55.50099,29.146101,10.793803,10.829215,10.75839,-0.01718


In [103]:
X_train_real = np.array(data_train_subsequences)
y_train_real = np.array(fake_y_train)[:,:4]

In [104]:
X_test_real = np.array(data_test_subsequences)
y_test_real = np.array(fake_y_test)[:,:4]

In [105]:
# Initialize a model, add a masking layer,LSTM,Dense Layers 
model_real = Sequential()
#model.add(normalizer)
#model.add(layers.Masking(mask_value=-1))
model_real.add(layers.LSTM(units=16, activation='tanh',input_shape = X_train_real[0].shape, return_sequences = True)) # first run 64 units
model_real.add(layers.LSTM(units=12,return_sequences = False, activation = "tanh"))
model_real.add(layers.Dense(10, activation='relu'))
model_real.add(layers.Dense(4, activation="relu"))

model_real.summary()


Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_8 (LSTM)                (None, 72, 16)            2496      
_________________________________________________________________
lstm_9 (LSTM)                (None, 12)                1392      
_________________________________________________________________
dense_10 (Dense)             (None, 10)                130       
_________________________________________________________________
dense_11 (Dense)             (None, 4)                 44        
Total params: 4,062
Trainable params: 4,062
Non-trainable params: 0
_________________________________________________________________


In [106]:
model_real.compile(optimizer='rmsprop', loss="mse", metrics="mae")

In [107]:
model_real.fit(X_train_real,y_train_real,epochs=10, batch_size=64, validation_split=0.3)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x174102940>

In [108]:
predictions_real = model_real.predict(X_test_real)

In [109]:
y_pred_real = np.exp((predictions_real * range1 + min1))

In [110]:
y_test_real_unscaled = np.exp((y_test * range1 + min1))

In [111]:
mae_real = np.mean(np.abs(y_test_real_unscaled - y_pred_real))

In [112]:
mae_real

22036.007745978834

## Simple Test with only open,close,high,low real prices and volume

In [139]:
df_test = pd.read_csv("../data/BTCUSD_4hours.csv")

In [140]:
df_test = df_test.drop(columns="Unnamed: 0").set_index("date")

In [141]:
df_test = df_test[10000:]

In [142]:
df_test.tail()

Unnamed: 0_level_0,open,high,low,close,volume,log_open,log_high,log_low,log_close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-08-22 12:00:00,48769.308417,48787.711375,48749.4175,48769.73775,1.697971,10.794844,10.795222,10.794435,10.794853
2021-08-22 16:00:00,48591.665708,48605.941333,48574.903375,48591.580833,0.463362,10.791204,10.791498,10.790859,10.791202
2021-08-22 20:00:00,48807.713542,48825.874417,48791.558,48810.16875,0.605696,10.79561,10.795982,10.795279,10.79566
2021-08-23 00:00:00,49779.332208,49803.461792,49759.784208,49783.003708,1.94136,10.815334,10.815818,10.814942,10.815408
2021-08-23 04:00:00,50278.596788,50292.908182,50261.680727,50277.475152,1.908007,10.825333,10.825618,10.824997,10.825311


In [143]:
df_test.columns

Index(['open', 'high', 'low', 'close', 'volume', 'log_open', 'log_high',
       'log_low', 'log_close'],
      dtype='object')

In [146]:
Train_s = df_test[:8000].drop(columns=['log_open','log_high','log_low','log_close'])
Test_s = df_test[8000:].drop(columns=['log_open','log_high','log_low','log_close'])

In [147]:
print(Train_s.shape)
print(Test_s.shape)

(8000, 5)
(2840, 5)


## Scaling

In [149]:
minmax_s = MinMaxScaler(feature_range = (0,1))

minmax_s.fit(Train_s)

Train_s_scaled = minmax_s.transform(Train_s)
Test_s_scaled = minmax_s.transform(Test_s)

In [172]:
min_s = minmax_s.data_min_[:4]
min_s

array([594.16482143, 594.25053571, 594.09839286, 594.18785714])

In [173]:
range_s = minmax_s.data_range_[:4]
range_s

array([18905.79622024, 18915.91613095, 18883.17235714, 18905.25764286])

## Create Subsequences X & y

In [152]:
window_size = 72 #because we want to base our prediction on 3 weeks (72*4 hours)
horizon = 1 #number of predictions into the future :) 
subs = []
y_subs = []
for k in range(len(Train_s_scaled)-(window_size-1)-horizon):
    subs.append(Train_s_scaled[k:k+window_size])
    y_subs.append(Train_s_scaled[k+window_size])

In [156]:
X_train_s = np.array(subs)
y_train_s = np.array(y_subs)[:,:4]

In [157]:
print(X_train_s.shape)
print(y_train_s.shape)

(7928, 72, 5)
(7928, 4)


In [158]:
window_size = 72 #because we want to base our prediction on 3 weeks (72*4 hours)
horizon = 1 #number of predictions into the future :) 
test_subs = []
test_y_subs = []
for k in range(len(Test_s_scaled)-(window_size-1)-horizon):
    test_subs.append(Test_s_scaled[k:k+window_size])
    test_y_subs.append(Test_s_scaled[k+window_size])

In [161]:
X_test_s = np.array(test_subs)
y_test_s = np.array(test_y_subs)[:,:4]

In [162]:
print(X_test_s.shape)
print(y_test_s.shape)

(2768, 72, 5)
(2768, 4)


## Simple Model

In [195]:
# Initialize a model, add a masking layer,LSTM,Dense Layers 
model_s = Sequential()
#model.add(normalizer)
#model.add(layers.Masking(mask_value=-1))
model_s.add(layers.LSTM(10, activation='tanh'))
model_s.add(layers.Dense(6, activation='relu'))
model_s.add(layers.Dense(4, activation='linear'))

In [196]:
model_s.compile(optimizer='rmsprop', loss="mse", metrics="mae")

In [197]:
model_s.fit(X_train_s,y_train_s,epochs=10, batch_size=64, validation_split=0.3)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x17563e880>

In [166]:
predictions_s = model_s.predict(X_test_s)

In [177]:
predictions_s.shape

(2768, 4)

In [178]:
y_test_s.shape

(2768, 4)

## Inverse Scaling

In [179]:
predictions_s_unscaled = predictions_s * range_s + min_s

In [198]:
predictions_s_unscaled

array([[10018.77672221, 10291.04306752, 10193.03631233, 10095.09956886],
       [10020.3498373 , 10295.79538081, 10205.07042328, 10101.16310118],
       [ 9973.86856874, 10220.48108065, 10138.61831471, 10041.51151036],
       ...,
       [25747.90987556, 27033.45630237, 25894.5876005 , 24432.40229745],
       [25757.79030037, 27048.18565486, 25909.02583239, 24437.07192725],
       [25811.81260483, 27117.46231857, 25976.47403039, 24459.67861472]])

In [180]:
y_test_s_unscaled = y_test_s * range_s + min_s

In [199]:
y_test_s_unscaled

array([[ 9678.09476987,  9682.41426778,  9672.39284519,  9677.72401674],
       [ 9565.92390756,  9571.18907563,  9559.5562605 ,  9565.39907563],
       [ 9682.19004255,  9689.28591489,  9676.74761702,  9683.05148936],
       ...,
       [48807.71354167, 48825.87441667, 48791.558     , 48810.16875   ],
       [49779.33220833, 49803.46179167, 49759.78420833, 49783.00370833],
       [50278.59678788, 50292.90818182, 50261.68072727, 50277.47515152]])

In [181]:
np.mean(np.abs(y_test_s_unscaled - predictions_s_unscaled ))

9824.96777289249