In this notebook, we will predict the close price this stock.

# Import Necessary Libary


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error

import time

import warnings
warnings.filterwarnings("ignore")

# Import Data

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df = pd.read_csv('/kaggle/input/intel-stock-prices-historical-data-intc/INTC.csv')
df.head()

# Data Preprocessing

In [None]:
# check missing value
df.isnull().sum()

In [None]:
# respon variable ('close' column)
y = df.iloc[:,4:5].astype(float).values

## Interval of Original Data

In [None]:
y_max = max(y)[0]
y_min = min(y)[0]

interval = y_max-y_min
interval

## Normalization

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_y = scaler.fit_transform(y)

By this normalization, thus we can say that this data have interval in [0,1].

## Data Visualization

In [None]:
# Plot of Original Data
plt.plot(y)
plt.show()

In [None]:
# Plot of Scaled Data
plt.plot(scaled_y)
plt.show()

## ACF Plot to Determine The Number of Time Window

In [None]:
# acf/pacf plot
fig,ax = plt.subplots(figsize=(20,15))
sm.graphics.tsa.plot_acf(scaled_y,lags=100,ax=ax);

It seems that there are k (which is many) steps of time that have acceptable correlation to current condition ($t^{th}$). However, because this is a daily data, we just choose one periode (e.g.) for time window.

## Create new Dataset based on Determined Time Window

In [None]:
# create function of timewindow
def timewindow(y, window):
    obs  = len(y)-window
    yt   = y[:obs,:]
    for i in np.arange(1,window+1):
        yt = np.hstack((yt, y[i:obs+i,:]))
    return yt

In [None]:
scaled_data = timewindow(scaled_y,1)

## Data Preview after Adding Time Window

In [None]:
scaled_data

In [None]:
# the shape of data
n_data,n_var = scaled_data.shape
n_data,n_var

Therefore, we have 10360 data with 3 variable (2 predictor, and other as target).

## Data Splitting
Split data to 20% data test, and the rest as data train.

In [None]:
n_test = int(0.2*n_data)
n_test

In [None]:
X_scaled_train = scaled_data[:-n_test,:-1]
y_scaled_train = scaled_data[:-n_test,-1]

X_scaled_test = scaled_data[-n_test:,:-1]
y_scaled_test = scaled_data[-n_test:,-1]

## Reshape

In [None]:
# reshape X_train and X_test to ndim = 3
X_scaled_train = np.reshape(X_scaled_train, (X_scaled_train.shape[0], 1, X_scaled_train.shape[1]))
X_scaled_test  = np.reshape(X_scaled_test,  (X_scaled_test.shape[0], 1, X_scaled_test.shape[1]))

# Modelling
## Model Architecture, Optimizer, and Loss

In [None]:
model = tf.keras.models.Sequential([tf.keras.layers.LSTM(4, input_shape=(1,1)),
                                    tf.keras.layers.Dense(1),])

optimizer = tf.keras.optimizers.Adam(learning_rate=2e-04)
model.compile(loss=tf.keras.losses.Huber(),optimizer=optimizer,metrics=["mae"])

model.summary()

## Callback

In [None]:
max_mae = 0.005 ; # 0.5% from scaled data

class StopCond(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('mae')<max_mae):
            print("MAE threshold condition has been satisfied.")
            self.model.stop_training = True


early_stopping    = StopCond()

## Training Model

In [None]:
start_time = time.time()

In [None]:
history = model.fit(X_scaled_train,y_scaled_train,epochs=100,batch_size = 1,callbacks = [early_stopping])

In [None]:
end_time = time.time()
durasi = (end_time - start_time)/60

print("Time elapsed to train model :",durasi,"minutes.")

From the training history above, the model has matched the maximum MAE condition which is 0.5% of scaled data.

# Model Evaluation on Test Data

## Model Evaluation on Validation/Test of Scaled Data

In [None]:
y_scaled_test_predict = model.predict(X_scaled_test)

mae = mean_absolute_error(y_scaled_test_predict,y_scaled_test)
print("MAE : " + str(mae) + " --> " + str(round(mae,3)*100) + "% of scaled data.")

## Plot of  Validation/Test of Scaled Data between Actual vs Predicted

In [None]:
plt.plot(y_scaled_test, label = 'Actual Serries')
plt.plot(y_scaled_test_predict, label = 'Predicted Series')
plt.title('Plot of Scaled Data Test')
plt.legend()
plt.show()

## Model Evaluation on Validation/Test of Original Data

In [None]:
y_test = y[-n_test:]
y_test_predict = scaler.inverse_transform(y_scaled_test_predict)

mae = mean_absolute_error(y_test_predict,y_test)
print("MAE : " + str(mae) + " --> " + str(round(mae,3)*100/interval) + "% of original data.")

## Plot of  Validation/Test of Original Data between Actual vs Predicted

In [None]:
plt.plot(y_test, label = 'Actual Serries')
plt.plot(y_test_predict, label = 'Predicted Series')
plt.title('Plot of Original Data Test')
plt.legend()
plt.show()

From both plots above, it seems that the predicted data (by model) have good approximation to the original and scaled data test.