In [12]:
import keras
import tensorflow as tf
from keras.models import Model
from keras.layers import Dense, Dropout, LSTM, Input, Activation, concatenate
from keras import optimizers
import numpy as np
np.random.seed(4)
from sklearn import preprocessing


In [90]:
def csv_to_dataset(csv_path):
    data = pd.read_csv(csv_path)
    data = data.drop('Date', axis=1)
    data = data.drop('Adj Close', axis=1)
    data = data.drop(0, axis=0)
    data=data.dropna(axis=0)
    print(data)
    data = data.values

    data_normaliser = preprocessing.MinMaxScaler()
    data_normalised = data_normaliser.fit_transform(data)

    # using the last {history_points} open close high low volume data points, predict the next open value
    ohlcv_histories_normalised = np.array([data_normalised[i:i + history_points].copy() for i in range(len(data_normalised) - history_points)])
    next_day_open_values_normalised = np.array([data_normalised[:, 0][i + history_points].copy() for i in range(len(data_normalised) - history_points)])
    
    next_day_open_values_normalised = np.expand_dims(next_day_open_values_normalised, -1)

    next_day_open_values = np.array([data[:, 0][i + history_points].copy() for i in range(len(data) - history_points)])
    next_day_open_values = np.expand_dims(next_day_open_values, -1)

    y_normaliser = preprocessing.MinMaxScaler()
    y_normaliser.fit(next_day_open_values)

    def calc_ema(values, time_period):
        # https://www.investopedia.com/ask/answers/122314/what-exponential-moving-average-ema-formula-and-how-ema-calculated.asp
        sma = np.mean(values[:, 3])
        ema_values = [sma]
        k = 2 / (1 + time_period)
        for i in range(len(his) - time_period, len(his)):
            close = his[i][3]
            ema_values.append(close * k + ema_values[-1] * (1 - k))
        return ema_values[-1]

    technical_indicators = []
    for his in ohlcv_histories_normalised:
        # note since we are using his[3] we are taking the SMA of the closing price
        sma = np.mean(his[:, 3])
        macd = calc_ema(his, 12) - calc_ema(his, 26)
        technical_indicators.append(np.array([sma]))
        # technical_indicators.append(np.array([sma,macd,]))

    technical_indicators = np.array(technical_indicators)

    tech_ind_scaler = preprocessing.MinMaxScaler()
    technical_indicators_normalised = tech_ind_scaler.fit_transform(technical_indicators)

    assert ohlcv_histories_normalised.shape[0] == next_day_open_values_normalised.shape[0] == technical_indicators_normalised.shape[0]
    return ohlcv_histories_normalised, technical_indicators_normalised, next_day_open_values_normalised, next_day_open_values, y_normaliser

In [91]:
history_points=50
ohlcv_histories, technical_indicators, next_day_open_values, unscaled_y, y_normaliser = csv_to_dataset('INFY.NS.csv')
print(ohlcv_histories.shape)
print(technical_indicators.shape)
print(next_day_open_values.shape)
print(unscaled_y.shape)


            Open         High          Low        Close      Volume
1     651.400024   653.299988   635.599976   639.049988  11963447.0
2     619.950012   634.900024   603.500000   623.849976  17783287.0
3     644.000000   646.000000   626.000000   628.750000   8878320.0
4     640.000000   661.000000   639.000000   653.299988  13322913.0
5     627.799988   646.500000   627.799988   633.200012  11660575.0
..           ...          ...          ...          ...         ...
246  1423.000000  1426.000000  1406.449951  1411.050049   5793571.0
247  1410.250000  1434.949951  1406.599976  1430.199951   5879234.0
248  1430.000000  1451.699951  1421.000000  1439.849976   5654434.0
249  1455.000000  1455.000000  1430.050049  1441.050049   6052712.0
250  1474.000000  1477.550049  1452.250000  1459.550049   3520200.0

[249 rows x 5 columns]
lol (199,)
(199, 1)
(199, 50, 5)
(199, 1)
(199, 1)
(199, 1)


In [86]:
test_split = 0.9
n = int(ohlcv_histories.shape[0] * test_split)

ohlcv_train = ohlcv_histories[:n]
tech_ind_train = technical_indicators[:n]
y_train = next_day_open_values[:n]

ohlcv_test = ohlcv_histories[n:]
tech_ind_test = technical_indicators[n:]
y_test = next_day_open_values[n:]

unscaled_y_test = unscaled_y[n:]

In [92]:
lstm_input = Input(shape=(history_points, 5), name='lstm_input')
x = LSTM(50, name='lstm_0')(lstm_input)
x = Dropout(0.2, name='lstm_dropout_0')(x)
x = Dense(64, name='dense_0')(x)
x = Activation('sigmoid', name='sigmoid_0')(x)
x = Dense(1, name='dense_1')(x)
output = Activation('linear', name='linear_output')(x)

model = Model(inputs=lstm_input, outputs=output)
adam = optimizers.Adam(lr=0.0005)
model.compile(optimizer=adam, loss='mean_squared_error')
model.fit(x=ohlcv_train, y=y_train, batch_size=32, epochs=10, shuffle=True, validation_split=0.1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x2a0d4891400>

In [93]:
print(model.summary())

Model: "functional_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_input (InputLayer)      [(None, 50, 5)]           0         
_________________________________________________________________
lstm_0 (LSTM)                (None, 50)                11200     
_________________________________________________________________
lstm_dropout_0 (Dropout)     (None, 50)                0         
_________________________________________________________________
dense_0 (Dense)              (None, 64)                3264      
_________________________________________________________________
sigmoid_0 (Activation)       (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 65        
_________________________________________________________________
linear_output (Activation)   (None, 1)               

In [99]:
y_test_predicted = model.predict(ohlcv_test)
print(y_test_predicted[start:end])
y_test_predicted = y_normaliser.inverse_transform(y_test_predicted)
y_predicted = model.predict(ohlcv_histories)
y_predicted = y_normaliser.inverse_transform(y_predicted)

assert unscaled_y_test.shape == y_test_predicted.shape
real_mse = np.mean(np.square(unscaled_y_test - y_test_predicted))
scaled_mse = real_mse / (np.max(unscaled_y_test) - np.min(unscaled_y_test)) * 100


import matplotlib.pyplot as plt

#plt.gcf().set_size_inches(22, 15, forward=True)

start = 0
end = -1
print(y_test_predicted.shape)
real = plt.plot(unscaled_y_test[start:end], label='real')
pred = plt.plot(y_test_predicted[start:end], label='predicted')

# real = plt.plot(unscaled_y[start:end], label='real')
# pred = plt.plot(y_predicted[start:end], label='predicted')

plt.legend(['Real', 'Predicted'])

plt.savefig("mygraph.png")


[[0.6668958 ]
 [0.6710758 ]
 [0.67391634]
 [0.67788315]
 [0.68110067]
 [0.6827162 ]
 [0.6832824 ]
 [0.6829225 ]
 [0.6843975 ]
 [0.68449146]
 [0.68409705]
 [0.6827892 ]
 [0.68456006]
 [0.68580973]
 [0.68693876]
 [0.68994063]
 [0.6925038 ]
 [0.6953338 ]
 [0.69854957]]
(20, 1)


nan
