In [1]:
# List all device
from tensorflow.python.client import device_lib
# print(device_lib.list_local_devices())

In [2]:
# Check available GPU
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

Using TensorFlow backend.


['/job:localhost/replica:0/task:0/device:GPU:0']

In [3]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID";
# The GPU id to use, usually either "0" or "1";
os.environ["CUDA_VISIBLE_DEVICES"]="0";  

In [4]:
# Importing the libraries
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Reshape, Lambda
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.activations import softmax
from keras.optimizers import SGD
from keras.models import load_model
import math
import pickle


In [5]:
with open("../data/dowjones_calculated/periods1.txt", "rb") as fp:   # Unpickling
    dataset = pickle.load(fp)

In [8]:
def normalize_data(df):
    """normalize a dataframe."""
    mean = df.mean(axis=1)
    std = df.std(axis=1)
    df = df.sub(mean, axis=0)
    df = df.div(std, axis=0)
    df = df.values
    return df

In [6]:
def get_one_hot(targets, nb_classes):
    res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
    return res.reshape(list(targets.shape)+[nb_classes])

In [44]:
def long_short_postion(returns, k):
    positions = returns
    short = np.argpartition(positions, k)[:k]
    neutral = np.argpartition(positions, len(positions) - k)[:(len(positions) - k)]
    positions[:] = 1
    positions[neutral] = 0
    positions[short] = -1
    return positions

In [12]:
positions = []
for i in range(len(dataset[0])):
    model_period = f"../model/LSTM/my_model2_period{i}.h5"
    regressor = load_model(model_period, custom_objects={"softmax": softmax})
    x_test = normalize_data(dataset[1][i][0])
    y_test = get_one_hot(dataset[1][5][1].values, 2) * 1.0
    test_gen = TimeseriesGenerator(x_test, y_test,
                                   length=240, sampling_rate=1,
                                   batch_size=64)


    predicted =  regressor.predict_generator(test_gen)
    label = predicted > 0.5
    label = label * 1 # Convert boolean to int
#     print((sum(y_test[:, :, 1] == label[:, :, 1])/(y_test.size/2)).sum())
    positions.append(predicted[:, :, 1])

In [42]:
stocks = pd.read_csv("../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv", index_col='Date',parse_dates=['Date'])
stocks = stocks[["Close", "Name"]]
stocks = stocks.pivot_table(values='Close', index=stocks.index, columns='Name', aggfunc='first')
stocks.head()

Name,AABA,AAPL,AMZN,AXP,BA,CAT,CSCO,CVX,DIS,GE,...,MSFT,NKE,PFE,PG,TRV,UNH,UTX,VZ,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-01-03,40.91,10.68,47.58,52.58,70.44,57.8,17.45,59.08,24.4,35.37,...,26.84,10.74,23.78,58.78,45.99,61.73,56.53,30.38,46.23,58.47
2006-01-04,40.97,10.71,47.25,51.95,71.17,59.27,17.85,58.91,23.99,35.32,...,26.97,10.69,24.55,58.89,46.5,61.88,56.19,31.27,46.32,58.57
2006-01-05,41.53,10.63,47.65,52.5,70.33,59.27,18.35,58.19,24.41,35.23,...,26.99,10.76,24.58,58.7,46.95,61.69,55.98,31.63,45.69,58.28
2006-01-06,43.21,10.9,47.87,52.68,69.35,60.45,18.77,59.25,24.74,35.47,...,26.91,10.72,24.85,58.64,47.21,62.9,56.16,31.35,45.88,59.43
2006-01-09,43.42,10.86,47.08,53.99,68.77,61.55,19.06,58.95,25.0,35.38,...,26.86,10.88,24.85,59.08,47.23,61.4,56.8,31.48,45.71,59.4


In [43]:
real_returns = (stocks - stocks.shift(1)) / stocks.shift(1)
real_returns = real_returns.dropna()
real_returns.head()

Name,AABA,AAPL,AMZN,AXP,BA,CAT,CSCO,CVX,DIS,GE,...,MSFT,NKE,PFE,PG,TRV,UNH,UTX,VZ,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-01-04,0.001467,0.002809,-0.006936,-0.011982,0.010363,0.025433,0.022923,-0.002877,-0.016803,-0.001414,...,0.004844,-0.004655,0.03238,0.001871,0.011089,0.00243,-0.006015,0.029296,0.001947,0.00171
2006-01-05,0.013669,-0.00747,0.008466,0.010587,-0.011803,0.0,0.028011,-0.012222,0.017507,-0.002548,...,0.000742,0.006548,0.001222,-0.003226,0.009677,-0.00307,-0.003737,0.011513,-0.013601,-0.004951
2006-01-06,0.040453,0.0254,0.004617,0.003429,-0.013934,0.019909,0.022888,0.018216,0.013519,0.006812,...,-0.002964,-0.003717,0.010985,-0.001022,0.005538,0.019614,0.003215,-0.008852,0.004158,0.019732
2006-01-09,0.00486,-0.00367,-0.016503,0.024867,-0.008363,0.018197,0.01545,-0.005063,0.010509,-0.002537,...,-0.001858,0.014925,0.0,0.007503,0.000424,-0.023847,0.011396,0.004147,-0.003705,-0.000505
2006-01-10,-0.010134,0.063536,-0.030374,-0.002964,0.004799,-0.004062,-0.004722,0.004919,0.0128,-0.00537,...,0.005212,0.001838,-0.016499,-0.003893,-0.007199,0.019707,0.000704,0.00413,0.003282,0.007744


In [59]:
probabilities = pd.DataFrame(data=all_positions, index=real_returns[750:3000].index, columns=real_returns.columns)
probabilities.head()

In [76]:
for k in range(1, 16):
    probabilities = pd.DataFrame(data=all_positions, index=real_returns[750:3000].index, columns=real_returns.columns)
    positions = probabilities.apply(lambda x: long_short_postion(x, k), axis=1, result_type='broadcast')
    final_returns = real_returns[750:3000].mul(positions)
    no_rebalance = (final_returns + 1).product().sum()/(2 * k)
    rebalance = (1 + final_returns.sum(axis = 1)/k).product()
#     rebalance = (1 + final_returns.sum(axis = 1)/(2 * k)).sum()
    print(f"no rebalance = {no_rebalance}")
    print(f"rebalance = {rebalance}")

no rebalance = 15.07234186625566
rebalance = 0.09473557044327233
no rebalance = 7.324241807788031
rebalance = 0.08780103197988086
no rebalance = 5.095163314847756
rebalance = 0.18866981676973454
no rebalance = 3.827341434690764
rebalance = 0.23984174391655125
no rebalance = 3.571774054173864
rebalance = 0.3980594231521633
no rebalance = 3.469743799542099
rebalance = 0.5879714086177179
no rebalance = 2.9603085355826164
rebalance = 0.5387741742184827
no rebalance = 2.685408499158556
rebalance = 0.6485249002451118
no rebalance = 2.474120543178399
rebalance = 0.7251084671251828
no rebalance = 2.3909160122114255
rebalance = 0.8494884105822601
no rebalance = 2.348005358325317
rebalance = 0.978946420576781
no rebalance = 2.1783217814254145
rebalance = 0.9962210148410209
no rebalance = 2.8158341193782856
rebalance = 1.0924777477887098
no rebalance = 2.8005594308975565
rebalance = 1.1547149904948832
no rebalance = 2.6764255288995984
rebalance = 1.2102539346650234
