In [None]:
!pip install python-binance mplfinance
!pip install yfinance

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting python-binance
  Downloading python_binance-1.0.16-py2.py3-none-any.whl (65 kB)
[K     |████████████████████████████████| 65 kB 2.7 MB/s 
[?25hCollecting mplfinance
  Downloading mplfinance-0.12.9b1-py3-none-any.whl (70 kB)
[K     |████████████████████████████████| 70 kB 9.8 MB/s 
[?25hCollecting dateparser
  Downloading dateparser-1.1.1-py2.py3-none-any.whl (288 kB)
[K     |████████████████████████████████| 288 kB 44.9 MB/s 
[?25hCollecting aiohttp
  Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 39.2 MB/s 
[?25hCollecting ujson
  Downloading ujson-5.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (45 kB)
[K     |████████████████████████████████| 45 kB 1.6 MB/s 
Collecting websockets
  Downloading websockets-10.3-cp37-cp37

In [None]:
import pandas as pd
from datetime import datetime as date
import yfinance as yf
import binance as bi
import json


In [None]:
def filter(item: list):
    return item[0:6]


class ContextProvider:
    def __init__(self) -> None:
        self.df = None
        self.stock = None
        self.start_date = None
        self.end_date = None
        self.freq = None
        self.interval = 1

    def get_data(self, stock, start_date, end_date):
        if (self.stock == stock and self.start_date == start_date and self.end_date == end_date):
            return self.df, self.freq
        return self.fetch_data(stock, start_date, end_date)

    def fetch_data(self, stock, start_date, end_date):
        if stock == "BTCUSDT":
            # tmp_end = int(date.fromisoformat(end_date).timestamp() * 1000)
            client = bi.Client()
            res = client.get_klines(
                symbol=stock,
                interval=client.KLINE_INTERVAL_5MINUTE,
                limit=3000,
                endTime=int(date.now().timestamp()*1000)
            )
            fil = list(map(filter, res))
            self.df = pd.DataFrame(
                fil, columns=['Date', 'Open', 'High', 'Low', 'Close', 'Volume'])

            self.freq = "5min"
        else:
            yf_ticker_data = yf.Ticker(stock)
            self.df = yf_ticker_data.history(
                period="1d",
                start=date.fromisoformat(start_date).strftime("%Y-%m-%d"),
                end=date.fromisoformat(end_date).strftime("%Y-%m-%d"))
            self.df = pd.DataFrame(self.df)
            self.df = self.df.iloc[:, :-2]
            self.df = self.df.reset_index()

            self.freq = "D"

        self.df['Open'] = self.df['Open'].astype('float64')
        self.df['High'] = self.df['High'].astype('float64')
        self.df['Low'] = self.df['Low'].astype('float64')
        self.df['Close'] = self.df['Close'].astype('float64')
        self.df['Volume'] = self.df['Volume'].astype('float64')
        self.df["Date"] = pd.to_datetime(self.df["Date"], unit='ms')

        poc = [100 * (b - a) / a for a,
               b in zip(self.df["Close"][::1], self.df["Close"][1::1])]
        # the beginning is always set 0
        poc.insert(0, 0)
        self.df["PoC"] = poc

        self.stock = stock
        self.start_date = start_date
        self.end_date = end_date

        return self.df, self.freq

    def handle_ws_message(self, message):
        # print(message)
        if isinstance(message, str):
            m = json.loads(message)
            # print(m)
            d = {}
            diff = 0
            at = None
            for key, value in m.items():
                if key == "E":
                    event_time = date.fromtimestamp(value/1000)
                    diff = (event_time -
                            self.df["Date"][len(self.df) - 1]).total_seconds()
                    # print(diff)
                    at = event_time
                if key == "k":
                    for key2, val2 in value.items():
                        if key2 == "o":
                            d["Open"] = float(val2)
                        if key2 == "c":
                            d["Close"] = float(val2)
                        if key2 == "h":
                            d["High"] = float(val2)
                        if key2 == "l":
                            d["Low"] = float(val2)
                        if key2 == "v":
                            d["Volume"] = float(val2)

            # print(new_row)
            print("adding new candlestick in: -" + str(60-diff) + "s")
            if diff > 60:
                d["Date"] = at
                last = self.df["Close"][len(self.df) - 1]
                d["PoC"] = 100*(d["Close"]-last)/last
                new_row = pd.DataFrame(d, index=[0])
                self.df = pd.concat([self.df, new_row], ignore_index=True)


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.python.keras.models import *
from tensorflow.python.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.layers import *


In [None]:
batch_size = 32
seq_len = 128

d_k = 256
d_v = 256
n_heads = 12
ff_dim = 256



In [None]:
class Time2Vector(Layer):
    def __init__(self, seq_len, **kwargs):
        super(Time2Vector, self).__init__()
        self.seq_len = seq_len

    def build(self, input_shape):
        # '''Initialize weights and biases with shape (batch, seq_len)'''
        self.weights_linear = self.add_weight(name='weight_linear',
                                              shape=(int(self.seq_len),),
                                              initializer='uniform',
                                              trainable=True)

        self.bias_linear = self.add_weight(name='bias_linear',
                                           shape=(int(self.seq_len),),
                                           initializer='uniform',
                                           trainable=True)

        self.weights_periodic = self.add_weight(name='weight_periodic',
                                                shape=(int(self.seq_len),),
                                                initializer='uniform',
                                                trainable=True)

        self.bias_periodic = self.add_weight(name='bias_periodic',
                                             shape=(int(self.seq_len),),
                                             initializer='uniform',
                                             trainable=True)

    def call(self, x):
        # '''Calculate linear and periodic time features'''
        x = tf.math.reduce_mean(x[:, :, :4], axis=-1)
        time_linear = self.weights_linear * x + self.bias_linear  # Linear time feature
        # Add dimension (batch, seq_len, 1)
        time_linear = tf.expand_dims(time_linear, axis=-1)

        time_periodic = tf.math.sin(tf.multiply(
            x, self.weights_periodic) + self.bias_periodic)
        # Add dimension (batch, seq_len, 1)
        time_periodic = tf.expand_dims(time_periodic, axis=-1)
        # shape = (batch, seq_len, 2)
        return tf.concat([time_linear, time_periodic], axis=-1)

    def get_config(self):  # Needed for saving and loading model with custom layer
        config = super().get_config().copy()
        config.update({'seq_len': self.seq_len})
        return config


class SingleAttention(Layer):
    def __init__(self, d_k, d_v):
        super(SingleAttention, self).__init__()
        self.d_k = d_k
        self.d_v = d_v

    def build(self, input_shape):
        self.query = Dense(self.d_k,
                           input_shape=input_shape,
                           kernel_initializer='glorot_uniform',
                           bias_initializer='glorot_uniform')

        self.key = Dense(self.d_k,
                         input_shape=input_shape,
                         kernel_initializer='glorot_uniform',
                         bias_initializer='glorot_uniform')

        self.value = Dense(self.d_v,
                           input_shape=input_shape,
                           kernel_initializer='glorot_uniform',
                           bias_initializer='glorot_uniform')

    def call(self, inputs):  # inputs = (in_seq, in_seq, in_seq)
        q = self.query(inputs[0])
        k = self.key(inputs[1])

        attn_weights = tf.matmul(q, k, transpose_b=True)
        attn_weights = tf.map_fn(lambda x: x/np.sqrt(self.d_k), attn_weights)
        attn_weights = tf.nn.softmax(attn_weights, axis=-1)

        v = self.value(inputs[2])
        attn_out = tf.matmul(attn_weights, v)
        return attn_out

#############################################################################


class MultiAttention(Layer):
    def __init__(self, d_k, d_v, n_heads):
        super(MultiAttention, self).__init__()
        self.d_k = d_k
        self.d_v = d_v
        self.n_heads = n_heads
        self.attn_heads = list()

    def build(self, input_shape):
        for n in range(self.n_heads):
            self.attn_heads.append(SingleAttention(self.d_k, self.d_v))

        # input_shape[0]=(batch, seq_len, 7), input_shape[0][-1]=7
        self.linear = Dense(input_shape[0][-1],
                            input_shape=input_shape,
                            kernel_initializer='glorot_uniform',
                            bias_initializer='glorot_uniform')

    def call(self, inputs):
        attn = [self.attn_heads[i](inputs) for i in range(self.n_heads)]
        concat_attn = tf.concat(attn, axis=-1)
        multi_linear = self.linear(concat_attn)
        return multi_linear

#############################################################################


class TransformerEncoder(Layer):
    def __init__(self, d_k, d_v, n_heads, ff_dim, dropout=0.1, **kwargs):
        super(TransformerEncoder, self).__init__()
        self.d_k = d_k
        self.d_v = d_v
        self.n_heads = n_heads
        self.ff_dim = ff_dim
        self.attn_heads = list()
        self.dropout_rate = dropout

    def build(self, input_shape):
        self.attn_multi = MultiAttention(self.d_k, self.d_v, self.n_heads)
        self.attn_dropout = Dropout(self.dropout_rate)
        self.attn_normalize = LayerNormalization(
            input_shape=input_shape, epsilon=1e-6)

        self.ff_conv1D_1 = Conv1D(
            filters=self.ff_dim, kernel_size=1, activation='relu')
        # input_shape[0]=(batch, seq_len, 7), input_shape[0][-1] = 7
        self.ff_conv1D_2 = Conv1D(filters=input_shape[0][-1], kernel_size=1)
        self.ff_dropout = Dropout(self.dropout_rate)
        self.ff_normalize = LayerNormalization(
            input_shape=input_shape, epsilon=1e-6)

    def call(self, inputs):  # inputs = (in_seq, in_seq, in_seq)
        attn_layer = self.attn_multi(inputs)
        attn_layer = self.attn_dropout(attn_layer)
        attn_layer = self.attn_normalize(inputs[0] + attn_layer)

        ff_layer = self.ff_conv1D_1(attn_layer)
        ff_layer = self.ff_conv1D_2(ff_layer)
        ff_layer = self.ff_dropout(ff_layer)
        ff_layer = self.ff_normalize(inputs[0] + ff_layer)
        return ff_layer

    def get_config(self):  # Needed for saving and loading model with custom layer
        config = super().get_config().copy()
        config.update({'d_k': self.d_k,
                       'd_v': self.d_v,
                       'n_heads': self.n_heads,
                       'ff_dim': self.ff_dim,
                       'attn_heads': self.attn_heads,
                       'dropout_rate': self.dropout_rate})
        return config


def create_model(dense=1):
    # '''Initialize time and transformer layers'''
    time_embedding = Time2Vector(seq_len)
    attn_layer1 = TransformerEncoder(d_k, d_v, n_heads, ff_dim)
    attn_layer2 = TransformerEncoder(d_k, d_v, n_heads, ff_dim)
    attn_layer3 = TransformerEncoder(d_k, d_v, n_heads, ff_dim)

    # '''Construct model'''
    in_seq = Input(shape=(seq_len, 5))
    x = time_embedding(in_seq)
    x = Concatenate(axis=-1)([in_seq, x])
    x = attn_layer1((x, x, x))
    x = attn_layer2((x, x, x))
    x = attn_layer3((x, x, x))
    x = GlobalAveragePooling1D(data_format='channels_first')(x)
    x = Dropout(0.1)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.1)(x)
    out = Dense(dense, activation='linear')(x)

    model = Model(inputs=in_seq, outputs=out)
    model.compile(loss='mse', optimizer='adam', metrics=['mae', 'mape'])
    return model


def TATE_train_model(X_train, y_train, X_val, y_val, n_forecast, stock):
    model = create_model(n_forecast)
    model.summary()

    callback = tf.keras.callbacks.ModelCheckpoint(f'models/{stock}_Transformer_TimeEmbedding.h5',
                                                  monitor='val_loss',
                                                  verbose=0)

    model.fit(X_train, y_train,
              batch_size=batch_size,
              epochs=20,
              callbacks=[callback],
              validation_data=(X_val, y_val))

    model = tf.keras.models.load_model(f'models/{stock}_Transformer_TimeEmbedding.h5',
                                       custom_objects={'Time2Vector': Time2Vector,
                                                       'SingleAttention': SingleAttention,
                                                       'MultiAttention': MultiAttention,
                                                       'TransformerEncoder': TransformerEncoder})

    return model


def TATE_load_model(stock):
    model = tf.keras.models.load_model(f'models/{stock}_Transformer_TimeEmbedding.h5',
                                       custom_objects={'Time2Vector': Time2Vector,
                                                       'SingleAttention': SingleAttention,
                                                       'MultiAttention': MultiAttention,
                                                       'TransformerEncoder': TransformerEncoder})
    return model


In [None]:
from time import time
from keras.layers import LSTM, Dropout, Dense
from keras.models import Sequential
from xgboost import XGBRegressor
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd
import pickle


In [None]:
def LSTM_build(units, x_train, y_train, dense, stock, feature):
    f = f'models/{stock}_{feature}_LSTM.h5'
    lstm_model = Sequential()
    lstm_model.add(LSTM(units=units, return_sequences=True,
                        input_shape=(x_train.shape[1], 1)))
    lstm_model.add(LSTM(units=units))
    lstm_model.add(Dense(dense))

    lstm_model.compile(loss='mean_squared_error', optimizer='adam')
    lstm_model.fit(x_train, y_train, epochs=10, batch_size=32, verbose=0)
    lstm_model.save(f)

    return lstm_model


def RNN_build(units, dropout, x_train, y_train, dense, stock, feature):
    f = f'models/{stock}_{feature}_RNN.h5'
    rnn_model = Sequential()
    rnn_model.add(LSTM(units=units, return_sequences=True,
                       input_shape=(x_train.shape[1], 1)))
    rnn_model.add(Dropout(dropout))
    for i in [True, True, False]:
        rnn_model.add(LSTM(units=units, return_sequences=i))
        rnn_model.add(Dropout(dropout))

    rnn_model.add(Dense(units=dense))
    rnn_model.compile(optimizer='adam', loss='mean_squared_error')
    rnn_model.fit(x_train, y_train, epochs=10, batch_size=32, verbose=0)
    rnn_model.save(f)

    return rnn_model


def XGBoost_build(X_train, y_train, eval_set, stock, feature):
    f = f'models/{stock}_{feature}_XGBoost.pkl'
    start_time = time()
    model = XGBRegressor(max_depth=7)
    model.fit(X_train, y_train, eval_set=eval_set, verbose=False)
    pickle.dump(model, open(f, "wb"))
    print('Fit time : ', time() - start_time)
    return model


def LSTM_train_forecast_prices(df, n_lookback, n_forecast, feature, dt_freq, stock):
    length = len(df)
    data = df

    new_dataset = pd.DataFrame(index=range(
        0, length), columns=['Date', feature])

    for i in range(0, len(data)):
        new_dataset["Date"][i] = data['Date'][i]
        new_dataset[feature][i] = data[feature][i]

    new_dataset.index = new_dataset.Date
    new_dataset.drop("Date", axis=1, inplace=True)
    final_dataset = new_dataset.values

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(final_dataset)

    x_train_data, y_train_data = [], []
    for i in range(n_lookback, len(final_dataset) - n_forecast + 1):
        x_train_data.append(scaled_data[i-n_lookback:i, 0])
        y_train_data.append(scaled_data[i:i+n_forecast, 0])

    x_train_data, y_train_data = np.array(x_train_data), np.array(y_train_data)

    x_train_data = np.reshape(
        x_train_data, (x_train_data.shape[0], x_train_data.shape[1]))

    X_test = scaled_data[-n_lookback:]
    X_test = np.array(X_test).reshape(1, n_lookback)

    _model = LSTM_build(
        50, x_train_data, y_train_data, n_forecast, stock, feature)

    predicted_closing_price = _model.predict(X_test)
    predicted_closing_price = scaler.inverse_transform(predicted_closing_price)

    df_past = new_dataset[:]
    t = pd.date_range(
        start=data['Date'][len(data)-1], periods=n_forecast, freq=dt_freq)
    df_future = pd.DataFrame(columns=["Date", "Predictions"])
    df_future["Date"] = t
    df_future.index = df_future["Date"]
    df_future.drop("Date", axis=1, inplace=True)
    df_future["Predictions"] = predicted_closing_price.flatten()

    return df_past, df_future


def RNN_train_forecast_prices(df, n_lookback, n_forecast, feature, dt_freq, stock):
    length = len(df)
    data = df

    new_dataset = pd.DataFrame(index=range(
        0, length), columns=['Date', feature])

    for i in range(0, len(data)):
        new_dataset["Date"][i] = data['Date'][i]
        new_dataset[feature][i] = data[feature][i]

    new_dataset.index = new_dataset.Date
    new_dataset.drop("Date", axis=1, inplace=True)
    final_dataset = new_dataset.values

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(final_dataset)

    x_train_data, y_train_data = [], []
    for i in range(n_lookback, len(final_dataset) - n_forecast + 1):
        x_train_data.append(scaled_data[i-n_lookback:i, 0])
        y_train_data.append(scaled_data[i:i+n_forecast, 0])

    x_train_data, y_train_data = np.array(x_train_data), np.array(y_train_data)

    x_train_data = np.reshape(
        x_train_data, (x_train_data.shape[0], x_train_data.shape[1]))

    X_test = scaled_data[-n_lookback:]
    X_test = np.array(X_test).reshape(1, n_lookback)

    _model = RNN_build(
        45, 0.2, x_train_data, y_train_data, n_forecast, stock, feature)

    predicted_closing_price = _model.predict(X_test)
    predicted_closing_price = scaler.inverse_transform(predicted_closing_price)

    df_past = new_dataset[:]
    t = pd.date_range(
        start=data['Date'][len(data)-1], periods=n_forecast, freq=dt_freq)
    df_future = pd.DataFrame(columns=["Date", "Predictions"])
    df_future["Date"] = t
    df_future.index = df_future["Date"]
    df_future.drop("Date", axis=1, inplace=True)
    df_future["Predictions"] = predicted_closing_price.flatten()

    return df_past, df_future


def XGBoost_train_forecast_prices(df, n_lookback, n_forecast, feature, dt_freq, stock):
    length = len(df)
    data = df

    new_dataset = pd.DataFrame(index=range(
        0, length), columns=['Date', feature])

    for i in range(0, len(data)):
        new_dataset["Date"][i] = data['Date'][i]
        new_dataset[feature][i] = data[feature][i]

    new_dataset.index = new_dataset.Date
    new_dataset.drop("Date", axis=1, inplace=True)
    final_dataset = new_dataset.values

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(final_dataset)

    X_test = scaled_data[-n_lookback:]
    X_test = np.array(X_test).reshape(1, n_lookback)

    test_size = 0.1
    test_ind = int(len(final_dataset) * (1-test_size))

    x_train, y_train = [], []
    for i in range(n_lookback, test_ind - n_forecast + 1):
        x_train.append(scaled_data[i-n_lookback:i, 0])
        y_train.append(scaled_data[i:i+n_forecast, 0])

    x_train, y_train = np.array(
        x_train), np.array(y_train)

    x_train = np.reshape(
        x_train, (x_train.shape[0], x_train.shape[1]))

    x_valid, y_valid = [], []
    for i in range(test_ind-n_forecast+1, len(final_dataset) - n_forecast + 1):
        x_valid.append(scaled_data[i-n_lookback:i, 0])
        y_valid.append(scaled_data[i:i+n_forecast, 0])

    x_valid, y_valid = np.array(x_valid), np.array(y_valid)
    x_valid = x_valid.reshape(x_valid.shape[0], x_valid.shape[1])

    _model = XGBoost_build(x_train, y_train, [
        (x_train, y_train), (x_valid, y_valid)], stock, feature)

    predicted_closing_price = _model.predict(X_test)
    predicted_closing_price = scaler.inverse_transform(predicted_closing_price)

    df_past = new_dataset[:]
    t = pd.date_range(
        start=data['Date'][len(data)-1], periods=n_forecast, freq=dt_freq)
    df_future = pd.DataFrame(columns=["Date", "Predictions"])
    df_future["Date"] = t
    df_future.index = df_future["Date"]
    df_future.drop("Date", axis=1, inplace=True)
    df_future["Predictions"] = predicted_closing_price.flatten()

    return df_past, df_future


def TATE_train_forecast_prices(df, n_forecast, dt_freq, feature, stock):
    __df = df
    cols = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
    df = df[cols].copy()

    # '''Calculate percentage change'''

    df['Open'] = df['Open'].pct_change()  # Create arithmetic returns column
    df['High'] = df['High'].pct_change()  # Create arithmetic returns column
    df['Low'] = df['Low'].pct_change()  # Create arithmetic returns column
    df['Close'] = df['Close'].pct_change()  # Create arithmetic returns column
    df['Volume'] = df['Volume'].pct_change()

    df.dropna(how='any', axis=0, inplace=True)  # Drop all rows with NaN values

    ###############################################################################
    # '''Create indexes to split dataset'''

    times = sorted(df.index.values)
    # Last 20% of series
    last_20pct = sorted(df.index.values)[-int(0.2*len(times))]

    ###############################################################################
    # '''Normalize price columns'''

    min_return = min(df[(df.index < last_20pct)]
                     [['Open', 'High', 'Low', 'Close']].min(axis=0))
    max_return = max(df[(df.index < last_20pct)]
                     [['Open', 'High', 'Low', 'Close']].max(axis=0))

    # Min-max normalize price columns (0-1 range)
    df['Open'] = (df['Open'] - min_return) / (max_return - min_return)
    df['High'] = (df['High'] - min_return) / (max_return - min_return)
    df['Low'] = (df['Low'] - min_return) / (max_return - min_return)
    df['Close'] = (df['Close'] - min_return) / (max_return - min_return)

    ###############################################################################
    # '''Normalize volume column'''

    min_volume = df[(df.index < last_20pct)]['Volume'].min(axis=0)
    max_volume = df[(df.index < last_20pct)]['Volume'].max(axis=0)

    # Min-max normalize volume columns (0-1 range)
    df['Volume'] = (df['Volume'] - min_volume) / (max_volume - min_volume)

    ###############################################################################
    # '''Create training, validation and test split'''

    # Training data are 80% of total data
    df_train = df[(df.index < last_20pct - n_forecast)].copy()
    df_val = df[(df.index >= last_20pct - n_forecast)].copy()

    # Remove date column
    df_train.drop(columns=['Date'], axis=1, inplace=True)
    df_val.drop(columns=['Date'], axis=1, inplace=True)

    # scaled data
    scaled_data = df['Close'].values

    # Convert pandas columns into arrays
    train_data = df_train.values
    val_data = df_val.values

    # Training data
    X_train, y_train = [], []
    for i in range(seq_len, len(train_data)):
        # Chunks of training data with a length of 128 df-rows
        X_train.append(train_data[i-seq_len:i])
        # Value of the feature that we work on
        y_train.append(scaled_data[i:i+n_forecast])
    X_train, y_train = np.array(
        X_train, dtype=list), np.array(y_train, dtype=list)
    X_train = X_train.reshape(X_train.shape[0], seq_len, X_train.shape[2])
    y_train = y_train.reshape(y_train.shape[0], n_forecast)
    X_train = X_train.astype(np.float32)
    y_train = y_train.astype(np.float32)

    ###############################################################################

    # Validation data
    X_val, y_val = [], []
    for i in range(seq_len, len(val_data) - n_forecast + 1):
        X_val.append(val_data[i-seq_len:i])
        y_val.append(scaled_data[i:i+n_forecast])
    X_val, y_val = np.array(X_val, dtype=list), np.array(y_val, dtype=list)
    X_val = X_val.reshape(X_val.shape[0], seq_len, X_val.shape[2])
    y_val = y_val.reshape(y_val.shape[0], n_forecast)
    X_val = X_val.astype(np.float32)
    y_val = y_val.astype(np.float32)

    ###############################################################################

    # Test data
    X_test = val_data[-seq_len:]
    X_test = np.array(X_test, dtype=list)
    X_test = X_test.reshape(1, X_test.shape[0], X_test.shape[1])
    X_test = X_test.astype(np.float32)
    # print(X_train.shape)
    # print(y_train.shape)
    # print(X_test.shape)

    _model = TATE_train_model(X_train, y_train, X_val,
                              y_val, n_forecast, stock)

    test_pred = _model.predict(X_test)
    # print(test_pred)
    pred = test_pred.flatten()*(max_return - min_return) + min_return
    # print(pred)
    pred = pd.DataFrame(pred, columns=['Predictions'])
    pred = pred['Predictions'].add(1, fill_value=0).cumprod()*__df['Close'][len(__df)-1]
    # print(pred.values)

    df_past = __df[:]
    t = pd.date_range(
        start=__df['Date'][len(__df)-1], periods=n_forecast, freq=dt_freq)
    df_future = pd.DataFrame(columns=["Date", "Predictions"])
    df_future["Date"] = t
    df_future.index = df_future["Date"]
    df_future.drop("Date", axis=1, inplace=True)
    df_future["Predictions"] = pred.values
    # print(df_future)

    return df_past, df_future



In [None]:
from os import path
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd
import pickle
from keras.models import load_model


In [None]:

def LSTM_load(stock, feature):
    f = f'models/{stock}_{feature}_RNN.h5'
    if path.exists(f):
        lstm_model = load_model(f)

    return lstm_model


def RNN_load(stock, feature):
    f = f'models/{stock}_{feature}_RNN.h5'
    if path.exists(f):
        rnn_model = load_model(f)

    return rnn_model


def XGBoost_load(stock, feature):
    f = f'models/{stock}_{feature}_XGBoost.pkl'
    if path.exists(f):
        model = pickle.load(open(f, "rb"))
    return model


def LSTM_load_forecast_prices(df, n_lookback, n_forecast, feature, dt_freq, stock):
    length = len(df)
    data = df

    new_dataset = pd.DataFrame(index=range(
        0, length), columns=['Date', feature])

    for i in range(0, len(data)):
        new_dataset["Date"][i] = data['Date'][i]
        new_dataset[feature][i] = data[feature][i]

    new_dataset.index = new_dataset.Date
    new_dataset.drop("Date", axis=1, inplace=True)
    final_dataset = new_dataset.values

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(final_dataset)

    X_test = scaled_data[-n_lookback:]
    X_test = np.array(X_test).reshape(1, n_lookback)

    _model = LSTM_load(stock, feature)

    predicted_closing_price = _model.predict(X_test)
    predicted_closing_price = scaler.inverse_transform(predicted_closing_price)

    df_past = new_dataset[:]
    t = pd.date_range(
        start=data['Date'][len(data)-1], periods=n_forecast, freq=dt_freq)
    df_future = pd.DataFrame(columns=["Date", "Predictions"])
    df_future["Date"] = t
    df_future.index = df_future["Date"]
    df_future.drop("Date", axis=1, inplace=True)
    df_future["Predictions"] = predicted_closing_price.flatten()

    return df_past, df_future


def RNN_load_forecast_prices(df, n_lookback, n_forecast, feature, dt_freq, stock):
    length = len(df)
    data = df

    new_dataset = pd.DataFrame(index=range(
        0, length), columns=['Date', feature])

    for i in range(0, len(data)):
        new_dataset["Date"][i] = data['Date'][i]
        new_dataset[feature][i] = data[feature][i]

    new_dataset.index = new_dataset.Date
    new_dataset.drop("Date", axis=1, inplace=True)
    final_dataset = new_dataset.values

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(final_dataset)

    X_test = scaled_data[-n_lookback:]
    X_test = np.array(X_test).reshape(1, n_lookback)

    _model = RNN_load(stock, feature)

    predicted_closing_price = _model.predict(X_test)
    predicted_closing_price = scaler.inverse_transform(predicted_closing_price)

    df_past = new_dataset[:]
    t = pd.date_range(
        start=data['Date'][len(data)-1], periods=n_forecast, freq=dt_freq)
    df_future = pd.DataFrame(columns=["Date", "Predictions"])
    df_future["Date"] = t
    df_future.index = df_future["Date"]
    df_future.drop("Date", axis=1, inplace=True)
    df_future["Predictions"] = predicted_closing_price.flatten()

    return df_past, df_future


def XGBoost_load_forecast_prices(df, n_lookback, n_forecast, feature, dt_freq, stock):
    length = len(df)
    data = df

    new_dataset = pd.DataFrame(index=range(
        0, length), columns=['Date', feature])

    for i in range(0, len(data)):
        new_dataset["Date"][i] = data['Date'][i]
        new_dataset[feature][i] = data[feature][i]

    new_dataset.index = new_dataset.Date
    new_dataset.drop("Date", axis=1, inplace=True)
    final_dataset = new_dataset.values

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(final_dataset)

    X_test = scaled_data[-n_lookback:]
    X_test = np.array(X_test).reshape(1, n_lookback)

    _model = XGBoost_load(stock, feature)

    predicted_closing_price = _model.predict(X_test)
    predicted_closing_price = scaler.inverse_transform(predicted_closing_price)

    df_past = new_dataset[:]
    t = pd.date_range(
        start=data['Date'][len(data)-1], periods=n_forecast, freq=dt_freq)
    df_future = pd.DataFrame(columns=["Date", "Predictions"])
    df_future["Date"] = t
    df_future.index = df_future["Date"]
    df_future.drop("Date", axis=1, inplace=True)
    df_future["Predictions"] = predicted_closing_price.flatten()

    return df_past, df_future


def TATE_load_forecast_prices(df, n_forecast, dt_freq, feature, stock):
    __df = df
    cols = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
    df = df[cols].copy()

    # '''Calculate percentage change'''

    df['Open'] = df['Open'].pct_change()  # Create arithmetic returns column
    df['High'] = df['High'].pct_change()  # Create arithmetic returns column
    df['Low'] = df['Low'].pct_change()  # Create arithmetic returns column
    df['Close'] = df['Close'].pct_change()  # Create arithmetic returns column
    df['Volume'] = df['Volume'].pct_change()

    df.dropna(how='any', axis=0, inplace=True)  # Drop all rows with NaN values

    ###############################################################################
    # '''Create indexes to split dataset'''

    times = sorted(df.index.values)
    # Last 20% of series
    last_20pct = sorted(df.index.values)[-int(0.2*len(times))]

    ###############################################################################
    # '''Normalize price columns'''

    min_return = min(df[(df.index < last_20pct)]
                     [['Open', 'High', 'Low', 'Close']].min(axis=0))
    max_return = max(df[(df.index < last_20pct)]
                     [['Open', 'High', 'Low', 'Close']].max(axis=0))

    # Min-max normalize price columns (0-1 range)
    df['Open'] = (df['Open'] - min_return) / (max_return - min_return)
    df['High'] = (df['High'] - min_return) / (max_return - min_return)
    df['Low'] = (df['Low'] - min_return) / (max_return - min_return)
    df['Close'] = (df['Close'] - min_return) / (max_return - min_return)

    ###############################################################################
    # '''Normalize volume column'''

    min_volume = df[(df.index < last_20pct)]['Volume'].min(axis=0)
    max_volume = df[(df.index < last_20pct)]['Volume'].max(axis=0)

    # Min-max normalize volume columns (0-1 range)
    df['Volume'] = (df['Volume'] - min_volume) / (max_volume - min_volume)

    ###############################################################################
    # '''Create training, validation and test split'''

    # Training data are 80% of total data
    df_val = df[(df.index >= last_20pct - n_forecast)].copy()

    # Remove date column
    df_val.drop(columns=['Date'], axis=1, inplace=True)

    # Convert pandas columns into arrays
    val_data = df_val.values

    ###############################################################################

    # Test data
    X_test = val_data[-seq_len:]
    X_test = np.array(X_test, dtype=list)
    X_test = X_test.reshape(1, X_test.shape[0], X_test.shape[1])
    X_test = X_test.astype(np.float32)

    _model = TATE_load_model(stock)

    test_pred = _model.predict(X_test)
    # print(test_pred)
    pred = test_pred.flatten()*(max_return - min_return) + min_return
    # print(pred)
    pred = pd.DataFrame(pred, columns=['Predictions'])  
    pred = pred['Predictions'].add(1, fill_value=0).cumprod()*__df['Close'][len(__df)-1]
    # print(pred.values)

    df_past = __df[:]
    t = pd.date_range(
        start=__df['Date'][len(__df)-1], periods=n_forecast, freq=dt_freq)
    df_future = pd.DataFrame(columns=["Date", "Predictions"])
    df_future["Date"] = t
    df_future.index = df_future["Date"]
    df_future.drop("Date", axis=1, inplace=True)
    df_future["Predictions"] = pred.values
    # print(df_future)

    return df_past, df_future


In [None]:
from datetime import datetime


Training with gpu on colab much faster than on my local pc

1 epoch = 44s while it's 150s on my pc (3 times faster)

In [None]:
stocks = ['AAPL', 'BTCUSDT', 'META', 'GOOG', 'NFLX', 'TSLA']
# stocks = ['AAPL']
# features = ["Close", "PoC"]
features = ["Close"]
# models = ["RNN", "LSTM", "TATE"]
models = ["TATE"]

ctx = ContextProvider()

for s in stocks:
    df, freq = ctx.get_data(s, datetime(2010, 1, 1).isoformat(), datetime.now().isoformat())
    # print(df)
    for f in features:
        for m in models:
            # if m == "XGBoost":
            #     XGBoost_train_forecast_prices(df, 60, 15, f, freq, s)
            # if m == "RNN":
            #     RNN_train_forecast_prices(df, 60, 15, f, freq, s)
            # if m == "LSTM":
            #     LSTM_train_forecast_prices(df, 60, 15, f, freq, s)
            if m == "TATE":
                TATE_train_forecast_prices(df, 15, freq, f, s)
            # print(p)
            # print(f)
        


Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 128, 5)]     0           []                               
                                                                                                  
 time2_vector_2 (Time2Vector)   (None, 128, 2)       512         ['input_2[0][0]']                
                                                                                                  
 concatenate_1 (Concatenate)    (None, 128, 7)       0           ['input_2[0][0]',                
                                                                  'time2_vector_2[0][0]']         
                                                                                                  
 transformer_encoder_6 (Transfo  (None, 128, 7)      99114       ['concatenate_1[0][0]',    

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
