In [1]:
# Importing modules
import numpy as np 
import pandas as pd

In [2]:
# Reading in the data
data = pd.read_csv('../data/processed/stocks/nse_scraped/TCS.csv', thousands=',')

In [3]:
# Isolating the date and close price
data = data[['Date', 'Close']]

In [4]:
new_data = data.loc[:,:]

In [5]:
new_data.shape

(4369, 2)

In [6]:
# Feature preprocessing
new_data = new_data.drop(['Date'], axis = 1)
new_data = new_data.reset_index(drop = True)
T = new_data.values
T = T.astype('float32')
T = np.reshape(T, (-1, 1))

In [7]:
# Min-max scaling to get the values in the range [0,1] to optimize convergence speed
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range = (0, 1))
T = scaler.fit_transform(T)



In [8]:
# 80-20 split
train_size = int(len(T) * 0.80)
test_size = int(len(T) - train_size)
train, test = T[0:train_size,:], T[train_size:len(T),:]


In [9]:
train.shape,test.shape

((3495, 1), (874, 1))

In [10]:
# Method for create features from the time series data
def create_features(data, window_size):
    X, Y = [], []
    for i in range(len(data) - window_size - 1):
        window = data[i:(i + window_size), 0]
        X.append(window)
        Y.append(data[i + window_size, 0])
    return np.array(X), np.array(Y)


In [11]:
# Roughly one month of trading 
window_size = 7
X_train, Y_train = create_features(train, window_size)

X_test, Y_test = create_features(test, window_size)


In [12]:
X_train.shape,Y_train.shape,X_test.shape,Y_test.shape

((3487, 7), (3487,), (866, 7), (866,))

In [13]:
# Reshape to the format of [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))

X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

T_shape = T.shape
train_shape = train.shape
test_shape = test.shape

# Make sure that the number of rows in the dataset = train rows + test rows
def isLeak(T_shape, train_shape, test_shape):
    return not(T_shape[0] == (train_shape[0] + test_shape[0]))

print(isLeak(T_shape, train_shape, test_shape))

# Model imports
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.models import Sequential
""" from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout """

# Setting seed for reproducibility 
tf.random.set_seed(11)
np.random.seed(11)

# Building model
model = Sequential()

model.add(LSTM(units = 50, activation = 'relu', #return_sequences = True, 
               input_shape = (X_train.shape[1], window_size)))
model.add(Dropout(0.2))

# Optional additional model layer to make a deep network. If you want to use this, uncomment #return_sequences param in previous add
"""
model.add(LSTM(units = 25, activation = 'relu'))
model.add(Dropout(0.2))
"""

# Output layer
model.add(Dense(1))
model.compile(loss = 'mean_squared_error', optimizer = 'adam')



history = model.fit(X_train, Y_train, epochs = 30, batch_size = 20, validation_data = (X_test, Y_test))

"""
Loading the best model and predicting
"""


# Predicting and inverse transforming the predictions

train_predict = model.predict(X_train)

Y_hat_train = scaler.inverse_transform(train_predict)

test_predict = model.predict(X_test)

Y_hat_test = scaler.inverse_transform(test_predict)

# Inverse transforming the actual values, to return them to their original values
Y_test = scaler.inverse_transform([Y_test])
Y_train = scaler.inverse_transform([Y_train])

# Reshaping 
Y_hat_train = np.reshape(Y_hat_train, newshape =X_train.shape[0])
Y_hat_test = np.reshape(Y_hat_test, newshape = X_test.shape[0])

Y_train = np.reshape(Y_train, newshape = X_train.shape[0])
Y_test = np.reshape(Y_test, newshape = X_test.shape[0])

# Model performance evaluation
from sklearn.metrics import mean_squared_error

train_RMSE = np.sqrt(mean_squared_error(Y_train, Y_hat_train))

test_RMSE = np.sqrt(mean_squared_error(Y_test, Y_hat_test))

print('Train RMSE is: ')
print(train_RMSE, '\n')
print('Test RMSE is: ')
print(test_RMSE)

False


2022-04-16 10:22:56.060413: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


NotImplementedError: Cannot convert a symbolic Tensor (lstm/strided_slice:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported

In [17]:
import tensorflow as tf

print(tf.__version__)

2.4.1
