In [1]:
from pathlib import Path
import os
import sys

root = Path(os.path.abspath('')).parent

sys.path.insert(0, str(root))

sys.path

['/Users/kael/Codes/ml/tensorflow-2-DA-RNN',
 '/Users/kael/Codes/ml/tensorflow-2-DA-RNN/notebook',
 '/Users/kael/opt/anaconda3/envs/strategy/lib/python38.zip',
 '/Users/kael/opt/anaconda3/envs/strategy/lib/python3.8',
 '/Users/kael/opt/anaconda3/envs/strategy/lib/python3.8/lib-dynload',
 '',
 '/Users/kael/opt/anaconda3/envs/strategy/lib/python3.8/site-packages',
 '/Users/kael/opt/anaconda3/envs/strategy/lib/python3.8/site-packages/IPython/extensions',
 '/Users/kael/.ipython']

In [2]:
import pandas as pd

stock = pd.read_csv(root / 'data' / '15m.csv', index_col=0).drop(columns=['open_time', 'close_time'])

print('length:', len(stock))

print('columns:', stock.columns)

# Remove zero volumn records
stock = stock[
    (stock != 0).all(axis=1)
].dropna()

print('length after cleaned:', len(stock))

stock = stock.iloc[:10000]

stock

length: 39348
columns: Index(['open', 'high', 'low', 'close', 'volume', 'quote_volume',
       'total_trades', 'taker_volume', 'taker_quote_volume'],
      dtype='object')
length after cleaned: 39345


Unnamed: 0,open,high,low,close,volume,quote_volume,total_trades,taker_volume,taker_quote_volume
0,7145.99,7161.38,7117.12,7137.44,552.274946,3.943150e+06,4627,299.994641,2.142624e+06
1,7137.46,7173.47,7137.45,7151.58,472.460817,3.381481e+06,4308,292.893319,2.096191e+06
2,7151.58,7167.00,7138.39,7156.53,302.007414,2.159535e+06,3728,183.059632,1.308919e+06
3,7156.04,7157.07,7133.52,7136.57,320.142548,2.286677e+06,3319,148.957600,1.063991e+06
4,7136.76,7141.99,7114.33,7120.10,378.542200,2.696923e+06,3524,171.678416,1.223234e+06
...,...,...,...,...,...,...,...,...,...
9995,6590.50,6599.79,6581.19,6598.46,410.816584,2.707478e+06,3801,187.754867,1.237408e+06
9996,6598.49,6609.49,6586.51,6606.05,408.520462,2.695473e+06,3864,237.400316,1.566351e+06
9997,6606.05,6638.00,6606.04,6622.23,997.422802,6.607192e+06,8590,576.481696,3.818843e+06
9998,6622.82,6627.66,6600.24,6609.36,492.504520,3.257754e+06,5285,240.048582,1.588112e+06


In [3]:
ranges = []

for column_name in stock.columns:
    column = stock[column_name]
    
    ranges.append((
        column_name,
        column.min(),
        column.max()
    ))
    
ranges

[('open', 3888.65, 10488.69),
 ('high', 4240.75, 10500.0),
 ('low', 3782.13, 10436.18),
 ('close', 3882.22, 10488.78),
 ('volume', 35.066062, 14596.107643),
 ('quote_volume', 308773.05142744, 90312985.11437915),
 ('total_trades', 462, 86789),
 ('taker_volume', 14.853535, 7962.468589),
 ('taker_quote_volume', 130811.55505231, 41910148.36916428)]

In [4]:
from stock_pandas import StockDataFrame

WINDOW_SIZE = 128
PREDICTION_SIZE = 4


def is_hhv(array) -> bool:
    return array[0] == array.max()


def is_llv(array) -> bool:
    return array[0] == array.min()


stock = StockDataFrame(stock)
        
original_n = len(stock.columns)

stock['is_hhv'] = stock.rolling_calc(
    PREDICTION_SIZE + 1,
    'high',
    is_hhv,
    True
)

stock['is_llv'] = stock.rolling_calc(
    PREDICTION_SIZE + 1,
    'high',
    is_hhv,
    True
)

stock

Unnamed: 0,open,high,low,close,volume,quote_volume,total_trades,taker_volume,taker_quote_volume,is_hhv,is_llv
0,7145.99,7161.38,7117.12,7137.44,552.274946,3.943150e+06,4627,299.994641,2.142624e+06,0.0,0.0
1,7137.46,7173.47,7137.45,7151.58,472.460817,3.381481e+06,4308,292.893319,2.096191e+06,1.0,1.0
2,7151.58,7167.00,7138.39,7156.53,302.007414,2.159535e+06,3728,183.059632,1.308919e+06,1.0,1.0
3,7156.04,7157.07,7133.52,7136.57,320.142548,2.286677e+06,3319,148.957600,1.063991e+06,0.0,0.0
4,7136.76,7141.99,7114.33,7120.10,378.542200,2.696923e+06,3524,171.678416,1.223234e+06,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
9995,6590.50,6599.79,6581.19,6598.46,410.816584,2.707478e+06,3801,187.754867,1.237408e+06,0.0,0.0
9996,6598.49,6609.49,6586.51,6606.05,408.520462,2.695473e+06,3864,237.400316,1.566351e+06,,
9997,6606.05,6638.00,6606.04,6622.23,997.422802,6.607192e+06,8590,576.481696,3.818843e+06,,
9998,6622.82,6627.66,6600.24,6609.36,492.504520,3.257754e+06,5285,240.048582,1.588112e+06,,


In [5]:
import numpy as np


features = stock.to_numpy()[:-PREDICTION_SIZE]

original_n = len(ranges)

STYLE_BULLISH = 0
STYLE_UNKNOWN = 1
STYLE_BEARISH = 2

Y_DIM = 1


def normalize(value, index):
    _, min_value, max_value = ranges[index]
    
    return (value - min_value) / (max_value - min_value) 


def apply_normalizer(array: np.ndarray) -> np.ndarray:
    return [
        normalize(value, i)
        for i, value in enumerate(array)
    ]


def apply_softmax(array: np.ndarray) -> np.ndarray:
    origin = array[:original_n]
    is_hhv, is_llv = array[original_n:]
    
    row = np.append(
        apply_normalizer(origin),
        STYLE_BULLISH if is_llv == 1 else (
            STYLE_BEARISH if is_hhv == 1 else STYLE_UNKNOWN
        )
    )
    
    return row


features = np.array([
    apply_softmax(row)
    for row in features
])

print('inputs length', len(features))

features

inputs length 9996


array([[0.49353337, 0.46661022, 0.50119702, ..., 0.03587757, 0.04815328,
        1.        ],
       [0.49224096, 0.46854176, 0.5042523 , ..., 0.03498405, 0.04704189,
        0.        ],
       [0.49438034, 0.46750809, 0.50439356, ..., 0.02116435, 0.02819833,
        0.        ],
       ...,
       [0.41155357, 0.37951991, 0.42089404, ..., 0.02909016, 0.03576403,
        1.        ],
       [0.40893995, 0.37653393, 0.41931906, ..., 0.04190355, 0.05171922,
        1.        ],
       [0.40936873, 0.3768886 , 0.42065509, ..., 0.02175512, 0.02648669,
        1.        ]])

In [6]:
VALIDATION_RATIO = 0.2


def get_labels_from_features(features):
    return features[WINDOW_SIZE - 1:, -Y_DIM:]


def split_by_ratio(features):
    length = len(features)
    validation_length = int(VALIDATION_RATIO * length)
    
    return features[:validation_length], features[-validation_length:]


training_features, validation_features = split_by_ratio(features)

print('training length', len(training_features))
print('validation length', len(validation_features))

training length 1999
validation length 1999


In [7]:
import tensorflow as tf
from get_rolling_window import rolling_window

train_ds = tf.data.Dataset.from_tensor_slices((
    rolling_window(training_features, WINDOW_SIZE, 1), 
    get_labels_from_features(training_features)
))

val_ds = tf.data.Dataset.from_tensor_slices((
    rolling_window(validation_features, WINDOW_SIZE, 1), 
    get_labels_from_features(validation_features)
))

train_ds

<TensorSliceDataset shapes: ((128, 10), (1,)), types: (tf.float64, tf.float64)>

In [9]:
from da_rnn import DARNN

model = DARNN(
    WINDOW_SIZE,
    64,
    64,
    Y_DIM
)

# model.summary()

model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

EPOCHS = 10

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS
)

history

Epoch 1/10


ValueError: in user code:

    /Users/kael/opt/anaconda3/envs/strategy/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /Users/kael/Codes/ml/tensorflow-2-DA-RNN/da_rnn/model.py:66 call  *
        X = inputs[:, :, :-self.y_dim]
    /Users/kael/opt/anaconda3/envs/strategy/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /Users/kael/opt/anaconda3/envs/strategy/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:1036 _slice_helper
        return strided_slice(
    /Users/kael/opt/anaconda3/envs/strategy/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /Users/kael/opt/anaconda3/envs/strategy/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:1209 strided_slice
        op = gen_array_ops.strided_slice(
    /Users/kael/opt/anaconda3/envs/strategy/lib/python3.8/site-packages/tensorflow/python/ops/gen_array_ops.py:10474 strided_slice
        _, _, _op, _outputs = _op_def_library._apply_op_helper(
    /Users/kael/opt/anaconda3/envs/strategy/lib/python3.8/site-packages/tensorflow/python/framework/op_def_library.py:748 _apply_op_helper
        op = g._create_op_internal(op_type_name, inputs, dtypes=None,
    /Users/kael/opt/anaconda3/envs/strategy/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py:590 _create_op_internal
        return super(FuncGraph, self)._create_op_internal(  # pylint: disable=protected-access
    /Users/kael/opt/anaconda3/envs/strategy/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:3528 _create_op_internal
        ret = Operation(
    /Users/kael/opt/anaconda3/envs/strategy/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:2015 __init__
        self._c_op = _create_c_op(self._graph, node_def, inputs,
    /Users/kael/opt/anaconda3/envs/strategy/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:1856 _create_c_op
        raise ValueError(str(e))

    ValueError: Index out of range using input dim 2; input has only 2 dims for '{{node DARNN/strided_slice}} = StridedSlice[Index=DT_INT32, T=DT_FLOAT, begin_mask=7, ellipsis_mask=0, end_mask=3, new_axis_mask=0, shrink_axis_mask=0](Cast, DARNN/strided_slice/stack, DARNN/strided_slice/stack_1, DARNN/strided_slice/stack_2)' with input shapes: [128,10], [3], [3], [3] and with computed input tensors: input[3] = <1 1 1>.
