In [107]:
#Setup
%matplotlib inline
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt


# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

In [108]:
def readData(filename):
    convertfunc = lambda x: (pd.to_datetime(x,utc=True)).tz_convert('Asia/Kolkata')
    return pd.read_csv(filename,
                    names=["datetime","open","high","low","close","volume"],
                    dtype=None,
                    delimiter = ',',
                    converters = {0:convertfunc},
                  #  index_col = 0
                   )

# Making sure that 2 timeseries are synced to the smaller time series 
def sanitizeTimeSeries(ts1,ts2):
    # If TS1 is not bigger, then make TS1 the bigger one and TS2 the smaller one.
    flipped = 0
    if len(ts2) > len(ts1):
        flipped = 1
        ts1,ts2 = ts2,ts1
    for dt in ts1["DateTime"].values:
        if dt in ts2['DateTime'].values:
            continue
        else:
            #print(dt)
            ts1.drop(ts1[ts1["DateTime"]==dt].index,inplace = True)
    if flipped:
        return ts2, ts1.reset_index(drop = True)
    else:
        return ts1.reset_index(drop = True), ts2
    

#Creates Lagged series to generate 60-1 x y split    
def timeseriesLagged(data, lag=60):
    df = data
    columns = [df.shift(i) for i in range(1, lag+2)] 
    df = pd.concat(columns,axis=1)
    df.fillna(0, inplace=True)
    df.columns = [str(lag+2-x) for x in range(1,lag+2)]
    df = df[df.columns[::-1]] #Flip because we want newer data on the right
    df= df.iloc[lag+1:] # drop the first 'lag' columns because zeroes.
    df.reset_index(drop=True,inplace=True)
    return df


# Binarizes the last column into 1 or 0.
# dif is the cost to buy. Rate is the per transasction cost. max is
def binarizeTime(series,lag,dif=0,rate=0.01,maxPer=[20]):
    #-1 is autocalculate the dif 
    if dif != 0:
        raise Exception("dif not yet baked in! ")
    series[str(lag+1)] = np.where(series[str(lag)] + dif < series[str(lag+1)], 1, 0)
    return series

In [109]:
# Setup Parameters
dataInit = readData("data/TRIL.csv")
data = dataInit['close'].diff().dropna() #difference the data and drop the useless rows
#data = (data-data.min())/(data.max()-data.min()) # Min max normalize
#data = data/np.linalg.norm(data) #vector norm
#data = np.log(data)
lag = 4 # 24*5 minutes
dif = 0 # difference between prices to trigger purchase for binarize
series = timeseriesLagged(data,lag)


# Turn the 61st column into or zero
#series = binarizeTime(series,lag,dif)
seriesVals = series.values

#Split into train and test
trainBegin = int(0.8*len(seriesVals))
train = seriesVals[0:trainBegin]
test = seriesVals[trainBegin:]

# Split into x and y
xTrain,yTrain = train[:,0:-1],train[:,-1] # X is the first 60 elements. Y is the 61st element
xTest,yTest = test[:,0:-1],test[:,-1]

#Reshape for keras
xTrain = xTrain.reshape((xTrain.shape[0], xTrain.shape[1], 1))
xTest = xTest.reshape(xTest.shape[0], xTest.shape[1],1)

In [110]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
import time

model = Sequential()
layers = [1, 50, 100,200,400,800,1600, 1]
model.add(LSTM(
        layers[1],
        input_shape=(None, 1),
        return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(
        layers[2],
        return_sequences=True))


model.add(LSTM(
        layers[3],
        return_sequences=True))

model.add(LSTM(
        layers[4],
        return_sequences=False))
model.add(Dropout(0.2))


model.add(Dense(
        layers[7]))
model.add(Activation("linear"))
start = time.time()
model.compile(loss="mse", optimizer="rmsprop")
print ("Compilation Time : " + str(time.time() - start))
model.summary()

Compilation Time : 0.009473562240600586
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_17 (LSTM)               (None, None, 50)          10400     
_________________________________________________________________
dropout_13 (Dropout)         (None, None, 50)          0         
_________________________________________________________________
lstm_18 (LSTM)               (None, None, 100)         60400     
_________________________________________________________________
lstm_19 (LSTM)               (None, None, 200)         240800    
_________________________________________________________________
lstm_20 (LSTM)               (None, 400)               961600    
_________________________________________________________________
dropout_14 (Dropout)         (None, 400)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 

In [111]:
epoch = 20

try:
    model.fit(
        xTrain, yTrain,
        batch_size=1024, epochs=epoch, validation_split=0.20)

except KeyboardInterrupt:
    print ('Training duration (s) : ', time.time() - global_start_time)


Train on 4976 samples, validate on 1244 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [113]:
predicted = model.predict(xTest)
#predicted = np.reshape(predicted, (predicted.size,))

In [None]:
type(xTest)

In [None]:
corr = 0
wrong = 0
total = 0
totalPreds = len(yTest)
for i in range(0,totalPreds):
    total+= 1
    if predicted[i] > 0 and yTest[i] > 0:
        corr += 1      
    elif predicted[i] < 0 and yTest[i] < 0:
        corr += 1
    else:
        wrong +=1
        
plt.plot(predicted[:100]*10)
plt.plot(yTest[:100])
plt.show()

print("Correct: %d" % corr)
print("Wrong:   %d" % wrong)
print("Total: %d" % total)

# Backtesting Tests

The rest of this section is devoted to backtesting

In [None]:
import backtrader as bt
import backtrader.feeds as btfeeds
convertfunc = lambda x: (pd.to_datetime(x,utc=True)).tz_convert('Asia/Kolkata')
curData = pd.read_csv("data/TRIL.csv",
                    names=["datetime","open","high","low","close","volume"],
                    dtype=None,
                    delimiter = ',',
                    converters = {0:convertfunc},
                    index_col = 0
                   )
curData = curData.iloc[trainBegin:]
testData = btfeeds.PandasData(dataname=curData,openinterest=-1,timeframe=bt.TimeFrame.Minutes)

In [122]:
class neuralInput(bt.Indicator):
    lines = ('change',)
    params = (('period', 20),('neuralModel',None))

    def __init__(self):
        self.addminperiod(self.params.period)
        

    def next(self):
        datasum = self.data.close.get(size=self.p.period)
        datasum = np.array(datasum)
        
        datasum = datasum.reshape(1, datasum.shape[0],1)
        ch = self.params.neuralModel.predict(datasum)
        self.lines.change[0] = ch

In [128]:

class SmaCross(bt.SignalStrategy):
    
    def __init__(self):
        test = neuralInput(period=lag+1,neuralModel=model)
        print(type(test))

        
cerebro = bt.Cerebro()

In [129]:

cerebro.adddata(testData)
cerebro.addstrategy(SmaCross)
print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())

cerebro.run()

print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())

Starting Portfolio Value: 10000.00
<class '__main__.neuralInput'>
Final Portfolio Value: 10000.00


In [None]:
xTest

In [None]:
curData