In [None]:
import pandas as pd
import numpy as np
import os
import random

from mlchartist.array_builder import build_arrays, build_randomised_arrays
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

In [None]:
df = pd.read_csv('../../raw_data/processed/aapl.csv')

In [None]:
df['date'] = pd.to_datetime(df['date'])

In [None]:
FIVE_TR = 0.0006
TEN_TR = 0.0012
TWENTY_TR = 0.0024
INPUT_COLS = ['RSI', 'Stochastic', 'Stochastic_signal', 'ADI','OBV', 'ATR', 'ADX', 'ADX_pos', 'ADX_neg', 'MACD', 'MACD_diff','MACD_signal', '1D_past_return', '5D_past_return', '10D_past_return']
#INPUT_COLS = ['RSI', 'Stochastic', 'Stochastic_signal']

In [None]:
df['5D_return_bin'] = (df['5TD_return'] >= FIVE_TR)
df['10D_return_bin'] = (df['10TD_return'] >= TEN_TR)
df['20D_return_bin'] = (df['20TD_return'] >= TWENTY_TR)

In [None]:
test_df = df[(df['date'].dt.year >= 2018)]
train_df = df[(df['date'].dt.year >= 1990) & (df['date'].dt.year < 2018)]

In [None]:
scaler = RobustScaler()

In [None]:
scaler.fit(train_df[INPUT_COLS])

In [None]:
train_df.loc[:, INPUT_COLS] = scaler.transform(train_df[INPUT_COLS])
test_df.loc[:, INPUT_COLS] = scaler.transform(test_df[INPUT_COLS])

In [None]:
train_df

In [None]:
apple_train_x_20, apple_train_y_20 = build_arrays(train_df,input_cols=INPUT_COLS, target_col='20D_return_bin', time_window=40, stride=1)
apple_test_x_20, apple_test_y_20 = build_arrays(test_df,input_cols=INPUT_COLS, target_col='20D_return_bin', time_window=40, stride=1)

In [None]:
indx = list(range(len(apple_train_x_20)))

In [None]:
len(indx)

In [None]:
test_df.shape

In [None]:
sample_indx = random.sample(indx, 7000)

In [None]:
X_train_20 =  apple_train_x_20[[sample_indx], :][0]

In [None]:
y_train_20 = apple_train_y_20[[sample_indx]]

In [None]:
from tensorflow.keras.metrics import Precision
from tensorflow.keras import regularizers

In [None]:
from tensorflow.keras import Sequential
from tensorflow.keras import layers, models 
from tensorflow.keras.optimizers import RMSprop, Adam

optim = RMSprop(learning_rate=0.0001)
precision = Precision()

def init_model():
    model = Sequential()
    reg_l1 = regularizers.l1(0.001)
    reg_l2 = regularizers.l2(0.001)
    reg_l1_l2 = regularizers.l1_l2(l1=0.001, l2=0.001)
    model.add(layers.LSTM(200, return_sequences=True, input_shape=(40,15), activation='tanh'))
    model.add(layers.LSTM(200, activation='tanh'))
    model.add(layers.Dropout(0.3))
    model.add(layers.Dense(200, activation='relu', kernel_regularizer=reg_l1))
    model.add(layers.Dropout(0.3))
    model.add(layers.Dense(100, activation='relu', bias_regularizer=reg_l2))
    model.add(layers.Dropout(0.3))
    model.add(layers.Dense(50, activation='relu', activity_regularizer=reg_l1_l2))
    model.add(layers.Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer=optim, metrics=[precision, 'accuracy'])
    
    return model

### Model for 20 days future returns

In [None]:
model_20 = init_model()

from tensorflow.keras.callbacks import EarlyStopping

es = EarlyStopping(patience=3, restore_best_weights=True)

model_20.fit(X_train_20, y_train_20, 
          epochs=500, 
          batch_size=16,
          validation_split=0.2,
          callbacks=[es])

In [None]:
apple_train_y_20.sum()/len(apple_train_y_20)

In [None]:
apple_test_y_20.sum()/len(apple_test_y_20)

In [None]:
model_20.evaluate(apple_test_x_20, apple_test_y_20)

### Model for 10 days future returns

In [None]:
apple_train_x_10, apple_train_y_10 = build_arrays(train_df,input_cols=INPUT_COLS, target_col='10D_return_bin', time_window=40, stride=1)
apple_test_x_10, apple_test_y_10 = build_arrays(test_df,input_cols=INPUT_COLS, target_col='10D_return_bin', time_window=40, stride=1)

In [None]:
X_train_10 =  apple_train_x_10[[sample_indx], :][0]
y_train_10 = apple_train_y_10[[sample_indx]]

In [None]:
model_10 = init_model()

from tensorflow.keras.callbacks import EarlyStopping

es = EarlyStopping(patience=3, restore_best_weights=True)

model_10.fit(X_train_10, y_train_10, 
          epochs=500, 
          batch_size=16,
          validation_split=0.2,
          callbacks=[es])

In [None]:
apple_test_y_10.sum()/len(apple_test_y_10)

In [None]:
model_10.evaluate(apple_test_x_10, apple_test_y_10)

### Model for 5 days future returns

In [None]:
apple_train_x_5, apple_train_y_5 = build_arrays(train_df,input_cols=INPUT_COLS, target_col='5D_return_bin', time_window=40, stride=1)
apple_test_x_5, apple_test_y_5 = build_arrays(test_df,input_cols=INPUT_COLS, target_col='5D_return_bin', time_window=40, stride=1)

In [None]:
X_train_5 =  apple_train_x_5[[sample_indx], :][0]
y_train_5 = apple_train_y_5[[sample_indx]]

In [None]:
model_5 = init_model()

from tensorflow.keras.callbacks import EarlyStopping

es = EarlyStopping(patience=3, restore_best_weights=True)

model_5.fit(X_train_5, y_train_5, 
          epochs=500, 
          batch_size=16,
          validation_split=0.2,
          callbacks=[es])

In [None]:
apple_test_y_5.sum()/len(apple_test_y_5)

In [None]:
model_5.evaluate(apple_test_x_5, apple_test_y_5)

# CNN test

### CNN for 20 days future returns

In [None]:
from tensorflow.keras.backend import expand_dims
X_train_cnn_20 = expand_dims(X_train_20, axis=-1)
X_test_cnn_20 = expand_dims(apple_test_x_20, axis=-1)


In [None]:
def initialize_model_cnn():
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), padding='same', input_shape=(40, 15, 1)))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Conv2D(64, (3, 3), padding='same'))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Conv2D(128, (3, 3), padding='same'))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(120, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(60, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

In [None]:
model_cnn_20 = initialize_model_cnn()

model_cnn_20.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

es = EarlyStopping(patience=5, restore_best_weights=True)

model_cnn_20.fit(X_train_cnn_20, y_train_20, 
          epochs=500, 
          batch_size=16,
          validation_split=0.2,
            callbacks=[es])

In [None]:
model_cnn_20.evaluate(X_test_cnn_20, apple_test_y_20)

### CNN for 10 days future returns

In [None]:
X_train_cnn_10 = expand_dims(X_train_10, axis=-1)
X_test_cnn_10 = expand_dims(apple_test_x_10, axis=-1)

In [None]:
model_cnn_10 = initialize_model_cnn()

model_cnn_10.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

es = EarlyStopping(patience=5, restore_best_weights=True)

model_cnn_10.fit(X_train_cnn_10, y_train_10, 
          epochs=500, 
          batch_size=16,
          validation_split=0.2,
            callbacks=[es])

In [None]:
model_cnn_10.evaluate(X_test_cnn_10, apple_test_y_10)

### CNN for 5 days future returns

In [None]:
X_train_cnn_5 = expand_dims(X_train_5, axis=-1)
X_test_cnn_5 = expand_dims(apple_test_x_5, axis=-1)

In [None]:
model_cnn_5 = initialize_model_cnn()

model_cnn_5.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

es = EarlyStopping(patience=5, restore_best_weights=True)

model_cnn_5.fit(X_train_cnn_5, y_train_5, 
          epochs=500, 
          batch_size=32,
          validation_split=0.2,
            callbacks=[es])

In [None]:
model_cnn_5.evaluate(X_test_cnn_5, apple_test_y_5)