In [1]:
import pandas as pd
import numpy as np

from definitions import REPO_ROOT

from src.models.mlstm_fcn import generate_model
from src.models.utils import train_model
from src.data.utils import train_validation_test_split, make_Xy

stock_data = pd.read_csv(REPO_ROOT / 'data' / 'processed' / 'stock_data.csv')

In [2]:
X_train, y_train = make_Xy(
    timeseries=stock_data,
    features=['open', 'high', 'low', 'close', 'volume'],
    outputs=['high', 'low']
)

In [3]:
# Relative increase from close
for i in range(X_train.shape[0]):
    close = X_train[i][3, -1]
    y_train[i] = (y_train[i] - close) / close

In [4]:
# Standardize price data
X_train_tf = (X_train - np.mean(X_train, axis=2).reshape((888, 5, 1))) / np.std(X_train, axis=2).reshape((888, 5, 1))

In [None]:
model = generate_model(
    num_features=5
)
train_model(
    model,
    X_train,
    y_train,
    None,
    None,
    REPO_ROOT / 'tensorboard_logs'
)

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 5, 365)]     0                                            
__________________________________________________________________________________________________
permute (Permute)               (None, 365, 5)       0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 365, 128)     5248        permute[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 365, 128)     512         conv1d[0][0]                     
______________________________________________________________________________________________



Epoch 1/500


In [2]:
from numpy import random

ratio = (0.7, 0.15, 0.15)
assets = []
for i in range(10):
    assets.append(np.array(range(int(np.random.normal(1000, 200)))))

n = len(assets)
distribute = []
whole = 1.0
for i in range(n):
    if i == n - 1:
        distribute.append(whole)
    else:
        std = 1 / (3 * (n - i))
        loc = 1 / (n - i)
        share = whole * np.clip(
            np.random.normal(loc, std), 0.5 * loc, 1.5 * loc
        )
        distribute.append(share)
        whole -= share

In [11]:
sum(distribute)

1.0

In [13]:
train_list, valid_list, test_list = [], [], []
for i, asset in enumerate(assets):
    test_beginning = 1.0 - distribute[i] * ratio[2] * n
    print(f'test_beginning: {test_beginning}')
    valid_beginning = test_beginning - ratio[1]
    test_beginning = int(test_beginning * len(asset))
    valid_beginning = int(valid_beginning * len(asset))
    train_list.append(asset[:valid_beginning])
    valid_list.append(asset[valid_beginning:test_beginning])
    test_list.append(asset[test_beginning:])

test_beginning: 0.8671766649396547
test_beginning: 0.8840518698771554
test_beginning: 0.8630205074182805
test_beginning: 0.7788230266790587
test_beginning: 0.8572340617869869
test_beginning: 0.7984081523995799
test_beginning: 0.8690972673406228
test_beginning: 0.871820143181516
test_beginning: 0.85822826480092
test_beginning: 0.8521400415762252


In [63]:
a[7:]

array([7, 8, 9])

In [3]:
assets = []
for i in range(10):
    assets.append(np.array(range(int(np.random.normal(1000, 200)))))

In [4]:
train, valid, test = train_validation_test_split(assets)

In [5]:
t, v, tt = [], [], []
for a, b, c in zip(train, valid, test):
    t.append(len(a) / (len(a)+len(b)+len(c)))
    v.append(len(b) / (len(a)+len(b)+len(c)))
    tt.append(len(c) / (len(a)+len(b)+len(c)))
    
print(np.mean(t))
print(np.mean(v))
print(np.mean(tt))

0.6996802939449093
0.1498429264365997
0.1504767796184909


In [6]:
stock_data['high_relative_increase_close'] = 

Unnamed: 0,close,high,low,open,volume
0,125.430000,128.000000,125.210000,127.820000,79388743.0
1,127.310000,127.720000,125.100000,125.230000,73359943.0
2,124.690000,124.920000,122.880000,123.160000,86428018.0
3,124.850000,126.990000,124.780000,126.560000,60196256.0
4,126.270000,126.920000,125.170000,126.820000,68472910.0
...,...,...,...,...,...
1254,25.087500,25.117500,24.812500,24.860001,145364800.0
1255,25.102501,25.182501,24.660000,24.920000,225324800.0
1256,24.905001,24.934999,24.527500,24.667500,152675200.0
1257,24.475000,24.522499,24.209999,24.305000,140560800.0
