In [1]:
# List all device
from tensorflow.python.client import device_lib
# print(device_lib.list_local_devices())

In [2]:
# Check available GPU
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

Using TensorFlow backend.


['/job:localhost/replica:0/task:0/device:GPU:0']

In [3]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID";
# The GPU id to use, usually either "0" or "1";
os.environ["CUDA_VISIBLE_DEVICES"]="0";  

In [4]:
# Importing the libraries
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Reshape, Lambda
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.activations import softmax
from keras.optimizers import SGD
from keras.models import load_model
import math
import pickle


In [5]:
with open("../data/dowjones_calculated/periods1.txt", "rb") as fp:   # Unpickling
    dataset = pickle.load(fp)

In [6]:
def get_one_hot(targets, nb_classes):
    res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
    return res.reshape(list(targets.shape)+[nb_classes])

In [7]:
positions = []
for i in range(len(dataset[0])):
    model_period = f"../model/LSTM/my_model1_period{i}.h5"
    regressor = load_model(model_period, custom_objects={"softmax": softmax})
    x_test = dataset[1][i][0].values
    y_test = dataset[1][i][1].values
    y_test = get_one_hot(y_test, 2)
    test_gen = TimeseriesGenerator(x_test, y_test,
                                   length=240, sampling_rate=1,
                                   batch_size=64)


    predicted = regressor.predict(x_test)
    label = predicted > 0.5
    label = label * 1 # Convert boolean to int
    print((sum(y_test[:, :, 1] == label[:, :, 1])/(y_test.size/2)).sum())
    positions.append(predicted[:, :, 1])

0.500258064516129
0.5028387096774194
0.500774193548387
0.48916129032258066
0.4904516129032258
0.5061935483870968
0.5003870967741935
0.4990967741935484
0.4931612903225806


In [8]:
all_positions = np.concatenate(positions,axis=0)

In [9]:
all_positions.shape

(2250, 31)

In [10]:
all_positions

array([[0.57124615, 0.573437  , 0.54652536, ..., 0.5       , 0.5       ,
        0.57784575],
       [0.5085084 , 0.59007627, 0.5350364 , ..., 0.5       , 0.5       ,
        0.56268674],
       [0.5193662 , 0.5351095 , 0.5723224 , ..., 0.5       , 0.5       ,
        0.6100473 ],
       ...,
       [0.5       , 0.48665977, 0.548019  , ..., 0.4910401 , 0.5       ,
        0.34335482],
       [0.5127578 , 0.46745497, 0.5642515 , ..., 0.4908291 , 0.47332513,
        0.5098946 ],
       [0.55694616, 0.46311957, 0.64376354, ..., 0.45142856, 0.47786263,
        0.53486437]], dtype=float32)

In [11]:
stocks = pd.read_csv("../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv", index_col='Date',parse_dates=['Date'])
stocks = stocks[["Close", "Name"]]
stocks = stocks.pivot_table(values='Close', index=stocks.index, columns='Name', aggfunc='first')
stocks.head()

Name,AABA,AAPL,AMZN,AXP,BA,CAT,CSCO,CVX,DIS,GE,...,MSFT,NKE,PFE,PG,TRV,UNH,UTX,VZ,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-01-03,40.91,10.68,47.58,52.58,70.44,57.8,17.45,59.08,24.4,35.37,...,26.84,10.74,23.78,58.78,45.99,61.73,56.53,30.38,46.23,58.47
2006-01-04,40.97,10.71,47.25,51.95,71.17,59.27,17.85,58.91,23.99,35.32,...,26.97,10.69,24.55,58.89,46.5,61.88,56.19,31.27,46.32,58.57
2006-01-05,41.53,10.63,47.65,52.5,70.33,59.27,18.35,58.19,24.41,35.23,...,26.99,10.76,24.58,58.7,46.95,61.69,55.98,31.63,45.69,58.28
2006-01-06,43.21,10.9,47.87,52.68,69.35,60.45,18.77,59.25,24.74,35.47,...,26.91,10.72,24.85,58.64,47.21,62.9,56.16,31.35,45.88,59.43
2006-01-09,43.42,10.86,47.08,53.99,68.77,61.55,19.06,58.95,25.0,35.38,...,26.86,10.88,24.85,59.08,47.23,61.4,56.8,31.48,45.71,59.4


In [12]:
real_returns = (stocks - stocks.shift(1)) / stocks.shift(1)
real_returns = real_returns.dropna()
real_returns.head()

Name,AABA,AAPL,AMZN,AXP,BA,CAT,CSCO,CVX,DIS,GE,...,MSFT,NKE,PFE,PG,TRV,UNH,UTX,VZ,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-01-04,0.001467,0.002809,-0.006936,-0.011982,0.010363,0.025433,0.022923,-0.002877,-0.016803,-0.001414,...,0.004844,-0.004655,0.03238,0.001871,0.011089,0.00243,-0.006015,0.029296,0.001947,0.00171
2006-01-05,0.013669,-0.00747,0.008466,0.010587,-0.011803,0.0,0.028011,-0.012222,0.017507,-0.002548,...,0.000742,0.006548,0.001222,-0.003226,0.009677,-0.00307,-0.003737,0.011513,-0.013601,-0.004951
2006-01-06,0.040453,0.0254,0.004617,0.003429,-0.013934,0.019909,0.022888,0.018216,0.013519,0.006812,...,-0.002964,-0.003717,0.010985,-0.001022,0.005538,0.019614,0.003215,-0.008852,0.004158,0.019732
2006-01-09,0.00486,-0.00367,-0.016503,0.024867,-0.008363,0.018197,0.01545,-0.005063,0.010509,-0.002537,...,-0.001858,0.014925,0.0,0.007503,0.000424,-0.023847,0.011396,0.004147,-0.003705,-0.000505
2006-01-10,-0.010134,0.063536,-0.030374,-0.002964,0.004799,-0.004062,-0.004722,0.004919,0.0128,-0.00537,...,0.005212,0.001838,-0.016499,-0.003893,-0.007199,0.019707,0.000704,0.00413,0.003282,0.007744


In [13]:
def long_short_postion(returns, k):
    positions = returns
    short = np.argpartition(positions, k)[:k]
    neutral = np.argpartition(positions, len(positions) - k)[:(len(positions) - k)]
    positions[:] = 1
    positions[neutral] = 0
    positions[short] = -1
    return positions

In [14]:
probabilities = pd.DataFrame(data=all_positions, index=real_returns[750:3000].index, columns=real_returns.columns)

In [15]:
probabilities.head()

Name,AABA,AAPL,AMZN,AXP,BA,CAT,CSCO,CVX,DIS,GE,...,MSFT,NKE,PFE,PG,TRV,UNH,UTX,VZ,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-12-26,0.571246,0.573437,0.546525,0.5,0.513644,0.430423,0.565427,0.530054,0.593216,0.601934,...,0.618371,0.5,0.482071,0.53338,0.334313,0.520684,0.551699,0.5,0.5,0.577846
2008-12-29,0.508508,0.590076,0.535036,0.478893,0.57836,0.473192,0.5,0.516386,0.484764,0.594085,...,0.56643,0.5,0.588645,0.512069,0.36993,0.448992,0.586964,0.5,0.5,0.562687
2008-12-30,0.519366,0.53511,0.572322,0.5,0.502617,0.397563,0.523143,0.480695,0.490447,0.59569,...,0.650183,0.5,0.527716,0.5,0.361797,0.421763,0.508838,0.5,0.5,0.610047
2008-12-31,0.5,0.645541,0.5,0.389144,0.594974,0.653959,0.480825,0.634148,0.541854,0.6116,...,0.462952,0.590617,0.39662,0.5,0.315047,0.601864,0.581181,0.552163,0.5,0.5
2009-01-02,0.5,0.595534,0.5,0.363204,0.634352,0.725393,0.404087,0.631303,0.400676,0.580626,...,0.359394,0.441606,0.472419,0.5,0.390882,0.581221,0.672462,0.549469,0.5,0.5


In [16]:
k = 10
probabilities.apply(lambda x: long_short_postion(x, k), axis=1, result_type='broadcast')
probabilities.head()

Name,AABA,AAPL,AMZN,AXP,BA,CAT,CSCO,CVX,DIS,GE,...,MSFT,NKE,PFE,PG,TRV,UNH,UTX,VZ,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-12-26,1.0,1.0,1.0,-1.0,0.0,-1.0,1.0,0.0,1.0,1.0,...,1.0,0.0,-1.0,0.0,-1.0,0.0,1.0,0.0,-1.0,1.0
2008-12-29,0.0,1.0,1.0,-1.0,1.0,-1.0,0.0,0.0,-1.0,1.0,...,1.0,-1.0,1.0,0.0,-1.0,-1.0,1.0,0.0,0.0,1.0
2008-12-30,0.0,1.0,1.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,1.0,...,1.0,-1.0,1.0,0.0,-1.0,-1.0,0.0,0.0,-1.0,1.0
2008-12-31,0.0,1.0,0.0,-1.0,1.0,1.0,0.0,1.0,0.0,1.0,...,-1.0,1.0,-1.0,0.0,-1.0,1.0,0.0,0.0,0.0,0.0
2009-01-02,0.0,1.0,0.0,-1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,...,-1.0,-1.0,-1.0,0.0,-1.0,1.0,1.0,0.0,0.0,0.0


In [17]:
probabilities1 = probabilities * -1

In [18]:
final_returns = real_returns[750:3000].mul(probabilities1)

In [19]:
(final_returns + 1).product().sum()/(2 * k)

1.789426372451126

In [20]:
(1 + final_returns.sum(axis = 1)/(2 * k)).product()

1.042597603786152