In [32]:
import pandas as pd
import numpy as np
import tensorflow as tf
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, GRU, Dropout, InputLayer

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/microsoft-stock-time-series-analysis/Microsoft_Stock.csv


In [33]:
df = pd.read_csv("/kaggle/input/microsoft-stock-time-series-analysis/Microsoft_Stock.csv")
for col in df.columns:
    df = df[pd.isnull(df[col])==False]
    
df['Date'] = df['Date'].apply(lambda x: x.split()[0])
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Average Price'] = df[['Open', 'High', 'Low', 'Close']].mean(axis=1)

price = df['Average Price']

In [34]:
xaxis_train = df.index[df.index <= '2020']
xaxis_test = df.index[df.index >= '2020']
yaxis_train = df['Average Price'][:'2020']
yaxis_test = df['Average Price']['2020':]

fig = go.Figure()

fig.add_trace(go.Scatter(x=xaxis_train,
                         y=yaxis_train,
                         name='Training set (Before 2020)'))
fig.add_trace(go.Scatter(x=xaxis_test,
                         y=yaxis_test,
                         name='Test set (2020 and beyond)'))

fig.update_layout(
    title='Microsoft stock price',
    xaxis_title='Date',
    yaxis_title='Price',
    legend_title='Data sets'
)

fig.show()

In [35]:
class DataProcessor:
    def __init__(self, timesteps):
        self.timesteps = timesteps

    def process_data(self, data):
        df_as_np = data.to_numpy()
        X = []
        y = []

        for i in range(len(df_as_np) - self.timesteps):
            row = [[a] for a in df_as_np[i:i + self.timesteps]]
            X.append(row)
            label = df_as_np[i + self.timesteps]
            y.append(label)

        X = np.array(X)
        y = np.array(y)

        return X, y

class Scale:
    def __init__(self):
        self.scaler = MinMaxScaler(feature_range=(0, 1))

    def scale_data(self, data):
        scaled_data = self.scaler.fit_transform(data.reshape(-1, 1))
        return self.scaler, scaled_data

In [36]:
processor = DataProcessor(90)
X, y = processor.process_data(price)
scaler, scaled_data = Scale().scale_data(y)
X_train, y_train = X[:1100], scaled_data[:1100]
X_val, y_val = X[1100:1200], scaled_data[1100:1200]
X_test, y_test = X[1200:], scaled_data[1200:]

In [38]:
class GRUModel(Sequential):
    def __init__(self):
        super().__init__()
        self.add(InputLayer((90, 1)))
        self.add(GRU(64))
        self.add(Dense(1, activation='tanh'))

model = GRUModel()
model.summary()

Model: "gru_model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_2 (GRU)                 (None, 64)                12864     
                                                                 
 dense_2 (Dense)             (None, 1)                 65        
                                                                 
Total params: 12,929
Trainable params: 12,929
Non-trainable params: 0
_________________________________________________________________


In [39]:
cp = ModelCheckpoint('model/', save_best_only=True)
model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, callbacks=[cp])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x79eee0182380>

In [40]:
mse, rmse = model.evaluate(X_test, y_test)
print("Mean Squared Error on test set: ", mse)
print("Root Mean Squared Error on test set: ", rmse)

Mean Squared Error on test set:  0.040511928498744965
Root Mean Squared Error on test set:  0.2012757509946823


In [52]:
predicted_train = model.predict(X_train)
predicted_train = scaler.inverse_transform(predicted_train)
predicted_test = model.predict(X_test)
predicted_test = scaler.inverse_transform(predicted_test)

fig = go.Figure()

fig.add_trace(go.Scatter(x=xaxis_train,
                         y=yaxis_train,
                         mode='lines',
                         name='Training set (Before 2020)',
                         line_color='blue'))

fig.add_trace(go.Scatter(x=xaxis_test,
                         y=yaxis_test,
                         mode='lines',
                         name='Test set (2020 and beyond)',
                         line_color='blue'))

fig.add_trace(go.Scatter(x=xaxis_test[90:],
                         y=predicted_test.flatten(),
                         mode='lines',
                         name='Predicted Prices (Test)',
                         line_color='red'))

fig.update_layout(
    title='Microsoft stock price and predictions',
    xaxis_title='Date',
    yaxis_title='Price',
    legend_title='Data sets'
)

fig.show()

