In [1]:
from tensorflow.keras import models
from pickle import load

from mlchartist.preprocessing import train_test_split_multiple_companies
from mlchartist.array_builder import full_dataset_randomised_arrays

import pandas as pd
import numpy as np

In [2]:
model_n100 = models.load_model('../../models/Nasdaq_100_150k')

In [3]:
model_n100.summary()

Model: "sequential_23"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_46 (LSTM)               (None, 30, 200)           171200    
_________________________________________________________________
lstm_47 (LSTM)               (None, 200)               320800    
_________________________________________________________________
dropout_69 (Dropout)         (None, 200)               0         
_________________________________________________________________
batch_normalization_10 (Batc (None, 200)               800       
_________________________________________________________________
dense_92 (Dense)             (None, 200)               40200     
_________________________________________________________________
dropout_70 (Dropout)         (None, 200)               0         
_________________________________________________________________
dense_93 (Dense)             (None, 100)             

In [7]:
import joblib
marcin_scaler = joblib.load('../../models/scaler') 
#marcin_scaler = load(open('../../models/scaler.pkl', 'rb'))
marcin_scaler

RobustScaler()

## Load in Data

In [14]:
test_tickers = [
    'PAYX','CERN', 'MYL', 'ILMN', 'ADBE', 'DLTR', 'MXIM', 'ALGN', 'ADP', 'KLAC', 'FAST', 'AMAT', 'NTES', 'NFLX', 'MELI'
]
len(test_tickers)

15

In [9]:
joined_df = pd.DataFrame()

# Takes nrows from history from each company from the sample
nrows = 3000

for ticker in test_tickers:
    ticker_path = '../../raw_data/processed/' + ticker.strip().lower() + '.csv'
    ticker_df = pd.read_csv(ticker_path, nrows=nrows)
    joined_df = joined_df.append(ticker_df)

In [11]:
joined_df.head()

Unnamed: 0,ticker,date,RSI,Stochastic,Stochastic_signal,ADI,OBV,ATR,ADX,ADX_pos,ADX_neg,MACD,MACD_diff,MACD_signal,5TD_return,10TD_return,20TD_return,1D_past_return,5D_past_return,10D_past_return
0,PAYX,2020-12-23,56.298581,39.804772,74.043277,1399423000.0,1512280848,1.970887,32.841726,33.964426,12.138839,1.651348,-0.035694,1.687043,-0.012924,-0.025212,-0.078814,-0.023078,-0.000741,0.012658
1,PAYX,2020-12-22,67.402102,91.331269,93.014603,1402695000.0,1515648489,1.689417,31.726424,30.104183,15.250593,1.803493,0.107527,1.695966,-0.046466,-0.052779,-0.101004,0.000414,0.033144,0.035358
2,PAYX,2020-12-21,67.294665,90.993789,95.240169,1403445000.0,1513500258,1.755526,31.647704,31.111397,15.805237,1.744399,0.075314,1.669084,-0.035718,-0.056321,-0.093591,-0.003405,0.057593,0.032496
3,PAYX,2020-12-18,69.037751,96.71875,95.241428,1402419000.0,1515660051,1.720567,31.572592,34.185348,13.745521,1.647672,-0.002584,1.650256,-0.027033,-0.071193,-0.078415,0.013278,0.055543,0.025934
4,PAYX,2020-12-17,65.879405,98.007968,87.434655,1400213000.0,1512467854,1.739072,30.720915,30.319197,14.645354,1.466647,-0.184254,1.650902,-0.009827,-0.055306,-0.076424,0.012491,0.047186,0.025188


In [10]:
joined_df.keys()

Index(['ticker', 'date', 'RSI', 'Stochastic', 'Stochastic_signal', 'ADI',
       'OBV', 'ATR', 'ADX', 'ADX_pos', 'ADX_neg', 'MACD', 'MACD_diff',
       'MACD_signal', '5TD_return', '10TD_return', '20TD_return',
       '1D_past_return', '5D_past_return', '10D_past_return'],
      dtype='object')

In [13]:
len(joined_df)

45000

In [15]:
joined_df['date'] = pd.to_datetime(joined_df['date'])

FIVE_TR = 0.0006
TEN_TR = 0.0012
TWENTY_TR = 0.0024

joined_df['5D_return_bin'] = (joined_df['5TD_return'] >= FIVE_TR)
joined_df['10D_return_bin'] = (joined_df['10TD_return'] >= TEN_TR)
joined_df['20D_return_bin'] = (joined_df['20TD_return'] >= TWENTY_TR)

## Split Test Set

In [16]:
train_set, test_set = train_test_split_multiple_companies(joined_df, '2Y')

print('train_set', len(train_set))
print('test_set', len(test_set))

train_set 37546
test_set 7454


## Generate Test Set

#### With outliers

In [18]:
INPUT_COLS = ['RSI', 'Stochastic', 'Stochastic_signal', 'ATR', 'ADX', 'ADX_pos', 'ADX_neg', 'MACD', 
              'MACD_diff','MACD_signal', '1D_past_return', '5D_past_return', '10D_past_return']

TARGET_COLS=['10D_return_bin']
outlier_validation={'1D_past_return': [-0.3, 0.3]}

stride = 1

holdout_x, holdout_y, scaler = full_dataset_randomised_arrays(unsplit_df=test_set, 
                                                                            stride=stride,
                                                                            split_dataframe=False, ## <-- HERE, use for holdout
                                                                            input_cols=INPUT_COLS, 
                                                                            fitted_scaler=marcin_scaler,
                                                                            outlier_threshold=1, 
                                                                            outlier_validation=outlier_validation, 
                                                                            check_train_outliers=False,
                                                                            check_test_outliers=False, 
                                                                            target_col=TARGET_COLS, 
                                                                            time_window=30,
                                                                            test_set_size='2Y')

print('')
print('')
print('### Stats ###')
print('holdout_x', holdout_x.shape)
print('holdout_y', holdout_y.shape)
print('scaler', scaler)

Train/Test Split: Not splitting dataframe. (Holdout Data)
Scaler: Using provided fitted_scaler
15 Companies in Dataset
Starting PAYX: Company 1 of 15
Starting CERN: Company 2 of 15
Starting MYL: Company 3 of 15
Starting ILMN: Company 4 of 15
Starting ADBE: Company 5 of 15
Starting DLTR: Company 6 of 15
Starting MXIM: Company 7 of 15
Starting ALGN: Company 8 of 15
Starting ADP: Company 9 of 15
Starting KLAC: Company 10 of 15
Starting FAST: Company 11 of 15
Starting AMAT: Company 12 of 15
Starting NTE: Company 13 of 15
Starting NFLX: Company 14 of 15
Starting MELI: Company 15 of 15
All Companies Completed



### Stats ###
holdout_x (7019, 30, 13)
holdout_y (7019, 1)
scaler RobustScaler()


In [20]:
model_n100.evaluate(holdout_x, holdout_y)



[0.33103007078170776, 0.9293519854545593, 0.8818920254707336]

#### Without Outliers

In [21]:
INPUT_COLS = ['RSI', 'Stochastic', 'Stochastic_signal', 'ATR', 'ADX', 'ADX_pos', 'ADX_neg', 'MACD', 
              'MACD_diff','MACD_signal', '1D_past_return', '5D_past_return', '10D_past_return']

TARGET_COLS=['10D_return_bin']
outlier_validation={'1D_past_return': [-0.3, 0.3]}

stride = 1

holdout_x_without_outliers, holdout_y_without_outliers, scaler = full_dataset_randomised_arrays(unsplit_df=test_set, 
                                                                            stride=stride,
                                                                            split_dataframe=False, ## <-- HERE, use for holdout
                                                                            input_cols=INPUT_COLS, 
                                                                            fitted_scaler=marcin_scaler,
                                                                            outlier_threshold=1, 
                                                                            outlier_validation=outlier_validation, 
                                                                            check_train_outliers=True,
                                                                            check_test_outliers=True, 
                                                                            target_col=TARGET_COLS, 
                                                                            time_window=30,
                                                                            test_set_size='2Y')

print('')
print('')
print('### Stats ###')
print('holdout_x', holdout_x_without_outliers.shape)
print('holdout_y', holdout_y_without_outliers.shape)
print('scaler', scaler)

Train/Test Split: Not splitting dataframe. (Holdout Data)
Scaler: Using provided fitted_scaler
15 Companies in Dataset
Starting PAYX: Company 1 of 15
Starting CERN: Company 2 of 15
Starting MYL: Company 3 of 15
Starting ILMN: Company 4 of 15
Starting ADBE: Company 5 of 15
Starting DLTR: Company 6 of 15
Starting MXIM: Company 7 of 15
Starting ALGN: Company 8 of 15
Starting ADP: Company 9 of 15
Starting KLAC: Company 10 of 15
Starting FAST: Company 11 of 15
Starting AMAT: Company 12 of 15
Starting NTE: Company 13 of 15
Starting NFLX: Company 14 of 15
Starting MELI: Company 15 of 15
All Companies Completed



### Stats ###
holdout_x (6989, 30, 13)
holdout_y (6989, 1)
scaler RobustScaler()


In [22]:
model_n100.evaluate(holdout_x_without_outliers, holdout_y_without_outliers)



[0.3304578363895416, 0.929009199142456, 0.8821004629135132]