# Outline
Goal: Predict 'Adj Close' as target with features as follows: 'Open', 'High', 'Low', 'Close'  
**How to use this notebook:** just input the number corresponded to comapnies that you want

# Import libraries and data

In [None]:
# Import basic libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.express as px
from plotly.offline import plot

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, GRU

In [None]:
# Label Encoding
list_company = ['Audi', 'BMW', 'Honda', 'Lucid Motors', 'NIO', 'Nissan', 'Rolls Royces', 'Tata', 'Tesla', 'Volkswagen']

le = LabelEncoder()

list_company_le = le.fit_transform(list_company)
list_company_le

In [None]:
# Display label encoded list of comapnies
for i, company in enumerate(range(0, len(list_company_le), 1)):
    print(i, ":", le.inverse_transform([company]))

In [None]:
# Choose wanted company
# Unavailable for 'input' on Kaggle session
# Therefore we have to define variable as number in custom.
# Uncomment following logic if you wanna input number on local session.
# num = int(input(">>> Choose Number of Company : "))

num = 8 #Tesla
company = le.inverse_transform([num])[0]
company

In [None]:
# Define filepath
filepath = "../input/share-price-of-top-electric-car-company/Top EV Company/{0}/{1}.csv".format(company, company.lower())
filepath

In [None]:
data = pd.read_csv(filepath)
data

In [None]:
# Sorting DataFrame
data.sort_values(by='Date', ascending=False, inplace=True)
data.reset_index(drop=True, inplace=True)
data.drop('Volume', axis=1, inplace=True)
data

# Create Datasets

In [None]:
# Utility Function
def make_dataset(data, label, window_size=20):
    feature_list = []
    label_list = []
    for i in range(len(data) - window_size):
        feature_list.append(np.array(data.iloc[i:i+window_size]))
        label_list.append(np.array(label.iloc[i+window_size]))
    return np.array(feature_list), np.array(label_list)

In [None]:
# Set feature and label
train_feature = data[['Open', 'High', 'Low', 'Close']]
train_label = data['Adj Close']

In [None]:
# train dataset
train_feature, train_label = make_dataset(train_feature, train_label, 10)

In [None]:
# Set X, y
X = train_feature
y = train_label

In [None]:
# Split train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=11)

# Modeling

In [None]:
model = Sequential()
model.add(GRU(16, activation='relu', input_shape=X_train.shape[1:]))
model.add(Dense(1))

In [None]:
model.summary()

In [None]:
model.compile(loss='mse', optimizer='adam', metrics=['mse'])

In [None]:
history = model.fit(X_train, y_train, batch_size=64, epochs=100, validation_data=(X_val, y_val))

In [None]:
# Plot epochs - loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend(['train', 'val'])
plt.show()

In [None]:
# Evaluate model
train_pred = model.predict(X_train)
train_rmse = np.sqrt(mean_squared_error(y_train, train_pred))

test_pred = model.predict(X_test)
test_rmse = np.sqrt(mean_squared_error(y_test, test_pred))

print(f"Train RMSE: {train_rmse.round(2)}")
print(f"Test RMSE: {test_rmse.round(2)}")

In [None]:
# Prediction Plot
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(y_test)), y=y_test.ravel(),
                    mode='lines',
                    name='test'))
fig.add_trace(go.Scatter(x=np.arange(len(y_test)), y=test_pred.ravel(),
                    line=dict(dash='dash', width=3),
                    name='pred'))

fig.update_layout(
    title="Adj Close Prediction of {}".format(company),
    height=500)
fig.show()