In [22]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import talib.abstract as ta
import tensorflow
import copy

In [23]:
def readData(filename):
    convertfunc = lambda x: (pd.to_datetime(x,utc=True)).tz_convert('Asia/Kolkata')
    return pd.read_csv(filename,
                    names=["DateTime","open","high","low","close","volume"],
                    dtype=None,
                    delimiter = ',',
                    converters = {0:convertfunc},
                  #  index_col = 0
                   )

In [24]:
A2Z = readData("data/A2Z.csv")
Nifty50 = readData("data/Nifty50.csv")

In [25]:
# Making sure that 2 timeseries are synced to the smaller time series 
def sanitizeTimeSeries(ts1,ts2):
    # If TS1 is not bigger, then make TS1 the bigger one and TS2 the smaller one.
    flipped = 0
    if len(ts2) > len(ts1):
        flipped = 1
        ts1,ts2 = ts2,ts1
    for dt in ts1["DateTime"].values:
        if dt in ts2['DateTime'].values:
            continue
        else:
            #print(dt)
            ts1.drop(ts1[ts1["DateTime"]==dt].index,inplace = True)
    if flipped:
        return ts2, ts1.reset_index(drop = True)
    else:
        return ts1.reset_index(drop = True), ts2      

In [26]:
# Setup the time series that will be used for prediction
Nifty50Data,A2ZData = sanitizeTimeSeries(Nifty50,A2Z)

In [15]:
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
# frame a sequence as a supervised learning problem
def timeseries_to_supervised(data, lag=60):
    df = data
    columns = [df.shift(i) for i in range(1, lag+1)]
    columns.append(df)
    df = pd.concat(columns, axis=1)
    df.fillna(0, inplace=True)
    df.columns = [str(x) for x in range(1,lag+2)]
    return df

def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return pd.Series(diff)

def inverse_difference(history, yhat, interval=1):
    return yhat + history[-interval]

# scale train and test data to [-1, 1]
def scale(train, test):
    # fit scaler
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler = scaler.fit(train)
    # transform train
    train = train.reshape(train.shape[0], train.shape[1])
    train_scaled = scaler.transform(train)
    # transform test
    test = test.reshape(test.shape[0], test.shape[1])
    test_scaled = scaler.transform(test)
    return scaler, train_scaled, test_scaled
 
# inverse scaling for a forecasted value
def invert_scale(scaler, X, value):
    new_row = [x for x in X] + [value]
    array = numpy.array(new_row)
    array = array.reshape(1, len(array))
    inverted = scaler.inverse_transform(array)
    return inverted[0, -1]

def upOrDown(x):
    if x > 0.5:
        return 1
#     elif x < -0.5:
#         return -1
    else:
        return 0
        

In [16]:
series = A2ZData['close']
rawValues = series.values
diffValues = difference(rawValues,1)
lag = 60
supervised = timeseries_to_supervised(diffValues,lag)
supervised = supervised.iloc[lag:]
supervised.reset_index(drop=True,inplace=True)
supervisedValues = supervised.values
# # Make the target column into 0 for stay 1 to buy -1 to sell
# supervised['Target'] = supervised.ix[:,lag]-supervised.ix[:,0]
# supervised = supervised.drop('61',1)
# supervised['Target'] = supervised['Target'].apply(lambda x: upOrDown(x))
#Split into train and test
trainBegin = int(0.8*len(supervisedValues))
train = supervisedValues[0:trainBegin]
test = supervisedValues[trainBegin:]
X_train,y_train = train[:,0:-1],train[:,-1] # X is the first 60 elements. Y is the 61st element
X_test,y_test = test[:,0:-1],test[:,-1]
# # scaler, train_scaled, test_scaled = scale(X_train, X_test)
# X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
# X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [None]:
for count,row in enumerate(X_train):
   # y_train[count] = y_train[count] - np.mean(row)
    X_train[count] = row-np.mean(row)
    

(31125, 60)

In [None]:
#model building
model = Sequential()
layers = [1, 50, 100, 1]
model.add(LSTM(
    layers[1],
    input_shape=(None, layers[0]),
    return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(
    layers[2],
    return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(
    layers[3]))
model.add(Activation("linear"))
model.compile(loss="mse",optimizer="adam")

In [None]:
epoch = 2
model.fit(
            X_train, y_train,
            batch_size=512, epochs=epoch, validation_split=0.05)

In [None]:
predicted = model.predict(X_test)


In [None]:
plt.plot(y_test)

In [None]:
def evaluate(data,pred_model):
    start = 100000 #start with 100000
    bought = 0
    sold = 0
    last = 0
    for count,sixtyMin in enumerate(data):
        sixtyMin = sixtyMin.reshape(1,sixtyMin.shape[0],sixtyMin.shape[1])
        x = pred_model.predict(sixtyMin)[0][0]
        last = sixtyMin[0][-1][0]
        if x > 0:
            start -= last
            bought += 1
        if count > 6:
            break
    return start,bought,last
            
    

In [None]:
start,bought,last = evaluate(X_test,model)

In [None]:
X_test.shape

In [None]:
temp = X_test[0]
temp.shape
temp = temp.reshape(1,temp.shape[0],temp.shape[1])
temp[0][-1][0]
val = model.predict(temp)[0][0]

In [None]:
start

In [None]:
val