In [1]:
from cnn_method import *
from cnn_preproc_function import *

import optparse
import numpy as np
import pandas as pd

import sklearn
from sklearn import preprocessing

import keras
import tensorflow as tf

from keras_model_configuration import *
from keras_metric import *

import datetime
import time
import os


Using TensorFlow backend.


In [2]:
gpudevice = "0"
os.environ["TF_MIN_GPU_MULTIPROCESSOR_COUNT"] = "4"
os.environ["CUDA_VISIBLE_DEVICES"]=gpudevice
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)

In [3]:

raw_data_file =  "./DATA/PRICE_LIQUIDASSET_30_MIN.csv"
target_col = TARGET_TO_PREDICT = "EURUSD"
FUTURE_PERIOD_PREDICT = 1

TARGET_FUNCTION = "cumulative_returns"
TARGET_THRESHOLD = 0.001
BREAKOUT_WINDOW = 60
FLIP = False


In [4]:
def create_target_2(df, target_col, FUTURE_PERIOD_PREDICT, TARGET_FUNCTION = "cumulative_returns", keras_preproc = True):
    if TARGET_FUNCTION == "cumulative_returns":
        TARGET_FUNCTION_R = cumulative_returns
    elif TARGET_FUNCTION == "mod_sharpe":
        TARGET_FUNCTION_R = mod_sharpe

    df.loc[:,'target'] = df[target_col].rolling(window = FUTURE_PERIOD_PREDICT).apply(lambda x: TARGET_FUNCTION_R(x))
    df.loc[:,'target'] = df['target'].shift(-FUTURE_PERIOD_PREDICT+1)
    df = df.dropna()
    return df


def clean_data_breakout_x(df, target_col, breakout_window = 60):
    price_df = df.fillna(method = "ffill").dropna()
    return_df = df.pct_change()

    return_df = filter_off_trading_day(return_df, target_col)
    filtered_index = return_df.index
    price_df = price_df.reindex(filtered_index)
    x_df = price_df.rolling(window = breakout_window).apply(lambda x: breakout(x)*1,raw = False)
    return_df["target"] = return_df[target_col]
    fdf = pd.merge(x_df, return_df[["target"]], left_index = True, right_index = True)
    fdf.dropna()

    return fdf

def clean_data_breakout_x_2(df, target_col, breakout_window = 60):
    price_df = df.fillna(method = "ffill").dropna()
    return_df = df.pct_change()
    return_df = filter_off_trading_day(return_df, target_col)
    return_df["target"] = return_df[target_col]
    fdf = return_df.dropna()

    return fdf

In [5]:
df = load_data(raw_data_file)


Load Data: Done!


In [6]:
df = clean_data_breakout_x_2(df, target_col, 60)

In [7]:
df = create_target_2(df, "target", FUTURE_PERIOD_PREDICT, TARGET_FUNCTION)


In [8]:

df = classify_target(df, "target", TARGET_THRESHOLD, FLIP)


In [9]:

def split_df(df, end_split):
    #Split df and get index
    start_index, end_index = get_index_from_date(df, end_split)
    target_col= "target"
    x_columns = [j for j in df.columns if j != target_col]

    #Scaling
    # scaler = sklearn.preprocessing.MinMaxScaler(feature_range = (0,1))
    scaler = sklearn.preprocessing.StandardScaler()
    #Fit train_x
    train_x_data = df[x_columns].iloc[start_index[0]:(end_index[0]+1)].values
    scaler.fit(train_x_data)
    #Scale all
    df.loc[:,x_columns] = scaler.transform(df[x_columns])
    X = df[x_columns].values
    Y = df[target_col].values

    return df, X, Y, start_index, end_index, scaler

import datetime
end_split = [datetime.datetime(2011,1,1), datetime.datetime(2013,1,1), datetime.datetime(2017,1,1)]




In [10]:
df, X, Y, start_index, end_index, scaler = split_df(df, end_split)


In [11]:
SEQ_LEN = 10
BATCH_SIZE = 256
train_data_gen, val_data_gen, test_1_data_gen, test_2_data_gen, shape_x = TSGenerator(X, Y, SEQ_LEN, BATCH_SIZE, start_index, end_index)


(256, 10, 56)
Number of batches per epoch: 128
TSGenerator: Done!


In [12]:
class_weights = get_class_weights(df, start_index, end_index)



[0.57924155 3.65491071]


In [13]:
df.columns


Index(['EURUSD', 'EEM', 'EFA', 'EWZ', 'FXI', 'HYG', 'IAU', 'IWM', 'SPY', 'VWO',
       'XLE', 'XLF', 'XLI', 'XLK', 'XLP', 'XLU', 'XOP', 'JNK', 'IYR', 'VEA',
       'SLV', 'XLV', 'RSX', 'TLT', 'EWJ', 'OIH', 'GLD', 'EZU', 'KRE', 'SMH',
       'XLB', 'XRT', 'LQD', 'EWT', 'XLY', 'VNQ', 'EWH', 'EWW', 'XBI', 'DIA',
       'EWG', 'VGK', 'IEF', 'EMB', 'FEZ', 'AGG', 'EWC', 'USDJPY', 'AUDUSD',
       'USDCAD', 'EURJPY', 'NZDUSD', 'hh', 'mm', 'ss', 'wkday', 'target'],
      dtype='object')

In [14]:
np.mean(df.iloc[start_index[1]:end_index[1]]["target"])


0.09250290197334188

In [None]:
logs_folder = "./output/logs"
models_folder = "./output/models"
model = rnn_model_conf_1_best(shape_x)
adm = keras.optimizers.Adam(lr=0.005, beta_1=0.9, beta_2=0.999, epsilon=None, amsgrad=False, decay = 1e-6)
model.compile(optimizer=adm, loss='binary_crossentropy', metrics=['accuracy', precision, f1])
tensorboard = keras.callbacks.TensorBoard(log_dir=logs_folder)
filepath = "DL-{epoch:04d}-{val_loss:.4f}-{val_acc:.4f}-{val_precision:.4f}-{val_f1:.4f}"
checkpoint = keras.callbacks.ModelCheckpoint("{}/{}.model".format(models_folder, filepath),
                                                       monitor="val_loss",
                                                       verbose=1,
                                                       save_best_only=False,
                                                       save_weights_only=False,
                                                       mode="auto",
                                                       period=3)

In [None]:
history = model.fit_generator(generator=train_data_gen,
                                  validation_data=val_data_gen,
                                  epochs=128,
                                  class_weight=class_weights,
                                  callbacks=[tensorboard, checkpoint],
                                  verbose=0)


Epoch 00003: saving model to ./output/models/DL-0003-0.2898-0.9075-0.0000-0.0000.model

Epoch 00006: saving model to ./output/models/DL-0006-0.2995-0.9075-0.0000-0.0000.model

Epoch 00009: saving model to ./output/models/DL-0009-0.3164-0.9075-0.0000-0.0000.model

Epoch 00012: saving model to ./output/models/DL-0012-0.3468-0.8765-0.1239-0.0769.model

Epoch 00015: saving model to ./output/models/DL-0015-0.4317-0.8653-0.1482-0.1154.model

Epoch 00018: saving model to ./output/models/DL-0018-0.5248-0.8726-0.1539-0.1064.model


KeyboardInterrupt: 

In [None]:
import pandas as pd

z = dict(a =['EURUSD', 'EEM', 'EFA', 'EWZ', 'FXI', 'GDX', 'HYG', 'IAU', 'IWM', 'SPY',
                   'USO', 'VWO', 'XLE', 'XLF', 'XLI', 'XLK', 'XLP', 'XLU', 'XOP', 'JNK',
                   'IYR', 'VEA', 'SLV', 'XLV', 'RSX', 'TLT', 'EWJ', 'OIH', 'GLD', 'EZU',
                   'KRE', 'SMH', 'XLB', 'XRT', 'LQD', 'EWT', 'XLY', 'VNQ', 'EWH', 'EWW',
                   'XBI', 'DIA', 'EWG', 'VGK', 'IEF', 'EMB', 'FEZ', 'AGG', 'ITB', 'EWC',
                   'UNG', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD',
                   'EURJPY', 'NZDUSD', 'XAGUSD'])
#pd.DataFrame(z).sort_values("a")

In [None]:
pd.__version__