In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Import the dataset

In [None]:
# import the dataset
dataset_train = pd.read_csv("/kaggle/input/tesla-stock-price/Tesla.csv - Tesla.csv.csv")
# show the dataset
dataset_train.head()

# Data exhibition

In [None]:
# show the row and column of the dataset
dataset_train.shape

In [None]:
# show the content of dataset in detail
dataset_train.describe()

# Graph the price

In [None]:
# plot the graph of open price
import matplotlib.pyplot as plt
plt.plot(dataset_train[['Open']], 'blue')
plt.title('Tesla stock open price')
plt.xlabel('time [days]')
plt.ylabel('price')
plt.show()

In [None]:
# plot the graph of high price
plt.plot(dataset_train[['High']], 'black')
plt.title('Tesla stock high price')
plt.xlabel('time [days]')
plt.ylabel('price')
plt.show()

In [None]:
# plot the graph of low price
plt.plot(dataset_train[['Low']], 'green')
plt.title('Tesla stock low price')
plt.xlabel('time [days]')
plt.ylabel('price')
plt.show()

In [None]:
# plot the graph of close price
plt.plot(dataset_train[['Close']], 'red')
plt.title('Tesla stock close price')
plt.xlabel('time [days]')
plt.ylabel('price')
plt.show()

In [None]:
# plot the graph of the volume
plt.plot(dataset_train[['Volume']].values, 'blue')
plt.title('Tesla stock price volumn')
plt.xlabel('time [days]')
plt.ylabel('volumn')
plt.show()

# Normalization to the data

In [None]:
# scaling the data of the stock price
from sklearn.preprocessing import MinMaxScaler
# scale the open price
sc = MinMaxScaler(feature_range=(0,1))
sc_open = sc.fit_transform(dataset_train[['Open']].values.reshape(-1,1))
print (sc_open)
print (sc_open.shape)

In [None]:
# scale the close price
sc_close = sc.fit_transform(dataset_train[['Close']].values.reshape(-1,1))
print (sc_close)
print (sc_close.shape)

In [None]:
# scale the high price
sc_high = sc.fit_transform(dataset_train[['High']].values.reshape(-1,1))
print (sc_high)
print (sc_high.shape)

In [None]:
# scale the low price
sc_low = sc.fit_transform(dataset_train[['Low']].values.reshape(-1,1))
print (sc_low)
print (sc_low.shape)

# Splitting the dataset

In [None]:
# define the function split the dataset 
def split_dataset(sc_data):
    # set 70% data as the train data
    train_size = int(len(sc_data)*0.7)
    # the remining data should be the test data
    test_size = len(sc_data)-train_size
    train_data = sc_data[0:train_size,:]
    test_data = sc_data[train_size:len(sc_data),:1]
    return train_size, test_size, train_data, test_data

In [None]:
# split the dataset of the open price
train_size1, test_size1, train_data1, test_data1 = split_dataset(sc_open)
# split the dataset of the close price
train_size2, test_size2, train_data2, test_data2 = split_dataset(sc_close)
# split the dataset of the high price
train_size3, test_size3, train_data3, test_data3 = split_dataset(sc_high)
# split the dataset of the low price
train_size4, test_size4, train_data4, test_data4 = split_dataset(sc_low)

In [None]:
# check the shape
train_data1.shape

In [None]:
# check the shape
test_data1.shape

In [None]:
# check the size
print (train_size1)
print (test_size1)

# Create train and test dataset

In [None]:
# define the function to create train and test dataset
def creat_dataset(data, timestamp=1):
    x_dataset = []
    y_dataset = []
    for i in range(len(data)-timestamp-1):
        x_dataset.append(data[i:(i+timestamp), 0])
        y_dataset.append(data[timestamp+i,0])
    return np.array(x_dataset), np.array(y_dataset)# change the list to np array format

In [None]:
# set timestamp to be 100 for training
timestamp = 100
# create the train and test dataset of open price
x_train1, y_train1 = creat_dataset(train_data1, timestamp)
x_test1, y_test1 = creat_dataset(test_data1, timestamp)
# create the train and test dataset of close price
x_train2, y_train2 = creat_dataset(train_data2, timestamp)
x_test2, y_test2 = creat_dataset(test_data2, timestamp)
# create the train and test dataset of high price
x_train3, y_train3 = creat_dataset(train_data3, timestamp)
x_test3, y_test3 = creat_dataset(test_data3, timestamp)
# create the train and test dataset of low price
x_train4, y_train4 = creat_dataset(train_data4, timestamp)
x_test4, y_test4 = creat_dataset(test_data4, timestamp)

In [None]:
x_train1.shape, y_train1.shape

In [None]:
x_test1.shape, y_test1.shape

# Reshape the dataset for LSTM

In [None]:
# define a function to reshape the dataset for using LSTM
def reshape_data(x_train, x_test):
    # (samples, timestamps, feature)
    # since timestamp is 100, column will be 100 (x_train.shape[1])
    x_train = x_train.reshape(x_train.shape[0],x_train.shape[1],1)
    x_test = x_test.reshape(x_test.shape[0],x_test.shape[1],1)
    return x_train, x_test

In [None]:
# reshape the dataset of the open price
x_train1, x_test1 = reshape_data(x_train1, x_test1)
# reshape the dataset of the close price
x_train2, x_test2 = reshape_data(x_train2, x_test2)
# reshape the dataset of the high price
x_train2, x_test2 = reshape_data(x_train2, x_test2)
# reshape the dataset of the low price
x_train2, x_test2 = reshape_data(x_train2, x_test2)

In [None]:
print (x_train1.shape)

In [None]:
print (x_test1.shape)

# Build Model

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import LSTM
# define a LSTM model
def lstm_model():
    # heap up the layers
    model = Sequential()
    # layer 1 
    model.add(LSTM(units = 50, return_sequences = True, input_shape = (100,1))) # 100 equals x_train1[1]
    # regularization
    model.add(Dropout(0.2))
    # layer 2
    model.add(LSTM(units = 50, return_sequences = True))
    # regularization
    model.add(Dropout(0.2))
    # layer 3
    model.add(LSTM(units = 50))
    model.add(Dropout(0.2))
    # connect the dense layer to LSTM for output
    model.add(Dense(1))
    return model              

In [None]:
model = lstm_model()
model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

# Train

In [None]:
# train the dataset of open price
model.fit(x_train1, y_train1, epochs = 100, batch_size = 64)

# Prediction

In [None]:
import tensorflow as tf
# *notice*: this is only the prediction of the open price
# predict the training group
train_predict = model.predict(x_train1)
# predict the test group
test_predict = model.predict(x_test1)
# transform the normalized data back to original format
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)

In [None]:
# this part of code is combining graphs, which is referenced from the Krish Naik
trainPredictPlot = np.empty_like(dataset_train[['Open']])
trainPredictPlot[:, :] = np.nan
trainPredictPlot[100:len(train_predict)+100, :] = train_predict
testPredictPlot = np.empty_like(dataset_train[['Open']])
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict)+(100*2)+1:len(dataset_train[['Open']])-1, :] = test_predict

In [None]:
# plot the real price and predicted price to compare
plt.plot(dataset_train[['Open']])
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.title("Tesla stock price prediction")
plt.xlabel("time")
plt.ylabel("stock price")
plt.show()