In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'tesla-stock-price:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F3161600%2F5475006%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240401%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240401T160124Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Dba1c1299f5c7896f5f88048e5d463fccb1c8df3cf83b7913ea715f98aec5725efe059e5eb374b9e2fa0db5f9f3e3352809f63c4fe147604da9ae7bf4902191c2fbffb64bfbfccab9e069be245b73f60acda9ac0a4b11295689fd9878219fcf5415081fe1056ec32e651c80ee12143eca4223bf8fa7eed8dd1358043638a5b345407bdeebed0e7fa7953c6683ff70f661617b41be468d8e933b9191b20de20b1e0efda65da51d38286ab6319eb5a41ccc5aea3af0bcc84d3f7c2454af9bca6d433ed1cab19f53980ec42da7c6d6807e6f68ac0f55e3585ac989e2a83d28a5a85282758e274a46c808c2259797f1f218f64e9cb0e55e11185d0dd131fe58f66e66'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


**Load Necessary Packages**

In [None]:
import pandas as pd
from sklearn import preprocessing
import numpy as np

import keras
import tensorflow as tf
from keras.models import Model
from keras.layers import Dense, Dropout, LSTM, Input, Activation, concatenate
from keras import optimizers
from keras.callbacks import EarlyStopping

np.random.seed(4)

from tensorflow.random import set_seed
set_seed(4)

**Use past 50 days closing price to predict next day closing price**

In [None]:
history_points = 50

**Load dataset**

In [None]:
data = pd.read_csv('/kaggle/input/tesla-stock-price/TSLA_daily.csv')
data.head()

Reverse the time series order so that Last days comes at the last

In [None]:
data = data.iloc[::-1]
data.reset_index(drop = True, inplace=True)
data.head()

**Drop Date Column**

In [None]:
data = data.drop('date', axis=1)

**Perform MinMax Scalar Normalization of the time series using sklearn preprocessing package**

In [None]:
data_normaliser = preprocessing.MinMaxScaler()
data_normalised = data_normaliser.fit_transform(data)

**Using the last {history_points} open high low close volume data points, predict the next close value**

In [None]:
ohlcv_histories_normalised = np.array([data_normalised[i  : i + history_points].copy() for i in range(len(data_normalised) - history_points)])

In [None]:
next_day_close_values_normalised = np.array([data_normalised[:,3][i + history_points].copy() for i in range(len(data_normalised) - history_points)])

In [None]:
next_day_close_values_normalised = np.expand_dims(next_day_close_values_normalised, -1)

In [None]:
next_day_close_values = np.array([data.to_numpy()[:,3][i + history_points].copy() for i in range(len(data) - history_points)])
next_day_close_values = next_day_close_values.reshape(next_day_close_values.shape[0], 1)

In [None]:
y_normaliser = preprocessing.MinMaxScaler()
y_normaliser.fit(next_day_close_values)

**Traing-test split in the ratio of 9:1**

In [None]:
test_split = 0.9 # the percent of data to be used for training
n = int(ohlcv_histories_normalised.shape[0] * test_split)

In [None]:
# splitting the dataset up into train and test sets

x_train = ohlcv_histories_normalised[:n]
y_train = next_day_close_values_normalised[:n]

In [None]:
x_test = ohlcv_histories_normalised[n:]
y_test = next_day_close_values_normalised[n:]

**Unscaled values for y-train and y-test will be used for calculating the model's RMSE later**

In [None]:
unscaled_y_train = next_day_close_values[:n]
unscaled_y_test = next_day_close_values[n:]

**Feature Engineering - Simple Moving Average for the closing prices is used as an additional input feature in the LSTM model.**

In [None]:
technical_indicators = []

for his in ohlcv_histories_normalised:
  # since we are using his[3] we are taking the SMA of the closing price
  sma = np.mean(his[:,3])
  technical_indicators.append(np.array([sma]))

technical_indicators = np.array(technical_indicators)

tech_ind_scaler = preprocessing.MinMaxScaler()
technical_indicators_normalised = tech_ind_scaler.fit_transform(technical_indicators)
technical_indicators_normalised.shape

In [None]:
tech_ind_train = technical_indicators_normalised[:n]
tech_ind_test = technical_indicators_normalised[n:]

**Time Series Forecasting using LSTM**

In [None]:
# define two sets of inputs
lstm_input = Input(shape=(history_points, 5), name='lstm_input')
dense_input = Input(shape=(technical_indicators.shape[1],), name='tech_input')

# the first branch operates on the first input
x = LSTM(32, name='lstm_0')(lstm_input)
x = Dropout(0.2, name='lstm_dropout_0')(x)
lstm_branch = Model(inputs=lstm_input, outputs=x)

# the second branch opreates on the second input
y = Dense(20, name='tech_dense_0')(dense_input)
y = Activation("relu", name='tech_relu_0')(y)
y = Dropout(0.2, name='tech_dropout_0')(y)
technical_indicators_branch = Model(inputs=dense_input, outputs=y)

# combine the output of the two branches
combined = concatenate([lstm_branch.output, technical_indicators_branch.output], name='concatenate')

z = Dense(64, activation="sigmoid", name='dense_pooling')(combined)
z = Dense(1, activation="linear", name='dense_out')(z)

# our model will accept the inputs of the two branches and then output a single value
model = Model(inputs=[lstm_branch.input, technical_indicators_branch.input], outputs=z)

adam = optimizers.Adam(lr=0.0005)
model.compile(optimizer=adam,
              loss='mse')

from keras.utils import plot_model
plot_model(model, to_file='model.png', show_shapes=True)

In [None]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=30)
history = model.fit(x=[x_train, tech_ind_train], y=y_train, batch_size=32, epochs=300, shuffle=True, validation_split=0.2, callbacks=[es])

In [None]:
evaluation = model.evaluate([x_test, tech_ind_test], y_test)
print(evaluation)

**Calculating Train RMSE**

In [None]:
y_predicted_train = model.predict([x_train, tech_ind_train])
y_predicted_train = y_normaliser.inverse_transform(y_predicted_train)

real_mse_train = np.mean(np.square(unscaled_y_train - y_predicted_train))
print("Train RMSE = {}".format(real_mse_train))

**Calculating Test RMSE**

In [None]:
y_test_predicted = model.predict([x_test, tech_ind_test])
y_test_predicted = y_normaliser.inverse_transform(y_test_predicted)

real_mse_test = np.mean(np.square(unscaled_y_test - y_test_predicted))
print("Test RMSE = {}".format(real_mse_test))

**Train Vs Validation Loss**

In [None]:
from matplotlib import pyplot

pyplot.plot(history.history['loss'])
pyplot.plot(history.history['val_loss'])
pyplot.title('model train vs validation loss')
pyplot.ylabel('loss')
pyplot.xlabel('epoch')
pyplot.legend(['train', 'validation'], loc='upper right')
pyplot.show()

**Real Vs Predicted Time Series**

In [None]:
import matplotlib.pyplot as plt
plt.gcf().set_size_inches(22, 15, forward=True)

start = 0
end = -1

real = plt.plot(unscaled_y_test[start:end], label='real')
pred = plt.plot(y_test_predicted[start:end], label='predicted')

plt.legend(['Real', 'Predicted'])

plt.show()