In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
 #   for filename in filenames:
       # print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 0 - Importing Packages

In [None]:
import matplotlib.pyplot as plt  #Visualization
import plotly.express as px #Visualization 
#!pip install pmdarima
from pmdarima import auto_arima #Predictions
import plotly.graph_objs as go

from statsmodels.tsa.arima_model import ARIMA
import warnings
warnings.filterwarnings('ignore')

# 1 - Data Exploration

Let's study Apple stocks.

In [None]:
df = pd.read_csv('../input/stock-market-dataset/stocks/AAPL.csv') 
df

In [None]:
df.set_index('Date',drop=False, inplace=True)
df.tail()

Let's see a brief description of the dataset

In [None]:
df.describe()

In [None]:
fig_open = px.line(data_frame=df, x='Date', y=['Open'])

fig_open.update_yaxes( # the y-axis is in dollars
    tickprefix="$", showgrid=False, title='Open Price')
fig_open.update_xaxes(
    title = 'Date')

fig_open.show()

In [None]:
fig_close = px.line(data_frame=df, x='Date', y=['Close'])

fig_close.update_yaxes( # the y-axis is in dollars
    tickprefix="$", showgrid=False, title='Close Price')
fig_close.update_xaxes(
    title = 'Date')

fig_close.show()

Since this notebook is just for learning, let's use the data since 2015.

In [None]:
df_lastyears = df.loc['2015-01-02':]
df_lastyears.tail()

In [None]:
fig_close_2 = px.line(data_frame=df_lastyears, x='Date', y=['Close'])

fig_close_2.update_yaxes(tickprefix="$", showgrid=False, title='Close Price')
fig_close_2.update_xaxes(title = 'Date')

fig_close_2.show()

In [None]:
fig_open_2 = px.line(data_frame=df, x='Date', y=['Open'])

fig_open_2.update_yaxes(tickprefix="$", showgrid=False, title='Open Price')
fig_open_2.update_xaxes(title = 'Date')

fig_open_2.show()

In [None]:
Fig = go.Figure([
                 go.Scatter(
                     name = 'Close',
                     x = df_lastyears.index,
                     y = df_lastyears['Close'],
                     mode= 'markers'
                 ),
                 go.Scatter(
                     name = 'Open',
                     x = df_lastyears.index,
                     y = df_lastyears['Open'],
                     mode= 'markers'
                 )

])

Fig.update_layout(
    yaxis_title='Price',
    yaxis_tickprefix='$',
    title = 'Precios de apertura y cierre'
)
Fig.update_traces(
    marker = dict(size=2.5)
)

Fig.show()

# 2 - Feature Engineering

Let's calculate daily return by using the following expression: <h3>
$Return[i] = \frac{Close[i-1]-Close[i]}{Close[i]}$

In [None]:
#Price difference between two consecutive days
df_lastyears['Price_Difference'] = df_lastyears['Close'].shift(-1) - df_lastyears['Close']
df_lastyears = df_lastyears.dropna()

#Let's calculate the daily return by using the expression we wrote before
df_lastyears['Return'] = df_lastyears['Price_Difference'] / df_lastyears['Close']

fig_return = px.line(data_frame=df_lastyears, x=df_lastyears.index, y='Return', title='Daily Return')
fig_return.update_xaxes(title='Date')
fig_return.show()

Now, lets calculate some moving averages:

In [None]:
df_lastyears['MA10_Close'] = df_lastyears['Close'].rolling(10).mean()
df_lastyears['MA10_Open'] = df_lastyears['Open'].rolling(10).mean()

df_lastyears['MA50_Close'] = df_lastyears['Close'].rolling(50).mean()
df_lastyears['MA50_Open'] = df_lastyears['Open'].rolling(50).mean()

In [None]:
MA_Fig = go.Figure([
                    
                    go.Scatter(
                        name='MA10 Close',
                        x = df_lastyears.index,
                        y = df_lastyears['MA10_Close']

                    ),
                    go.Scatter(
                        name='MA50 Close',
                        x = df_lastyears.index,
                        y = df_lastyears['MA50_Close']

                    ),
                    go.Scatter(
                        name='Close Price',
                        x = df_lastyears.index,
                        y = df_lastyears['Close'],
                        mode = 'markers'

                    )
])
MA_Fig.update_layout(
    yaxis_title = 'Price',
    title = 'Close Price & MA10, MA50'
)

MA_Fig.update_traces(
    marker = dict(size=4)
)

MA_Fig.show()

# 4 - Price Predictions

Let's separate our data in train and test.

In [None]:
train_data, test_data = df_lastyears[0:int(len(df_lastyears)*0.7)], df_lastyears[int(len(df_lastyears)*0.7):]

In [None]:
train_data

In [None]:
Fig_tt = go.Figure([
                    
                    go.Scatter(
                        name= 'Train closes',
                        x = train_data.index,
                        y = train_data['Close'],
                        mode = 'markers'
                    ),
                    
                    go.Scatter(
                        name = 'Test closes',
                        x = test_data.index, 
                        y = test_data['Close'],
                        mode = 'markers'

                    )
])

Fig.update_layout(
    yaxis_title = 'Price ($)',
    title = 'Training & Test Close Prices'
)
Fig_tt.show()

Let's configure our model

In [None]:
ex_variables = ['MA50_Close', 'MA50_Open', 'MA10_Open', 'MA10_Close']
train_data = train_data.dropna()
model = auto_arima(
    train_data['Close'],
    exogenous=train_data[ex_variables],
    trace=True, error_action="ignore",
    suppress_warnings=True)

Now let's fit our model

In [None]:
model.fit(train_data['Close'], exogenous=train_data[ex_variables])


Let's drop any $NaN$ value that we could have:

In [None]:
test_data = test_data.dropna()

And now let's make predictions

In [None]:
predictions = model.predict(n_periods=len(test_data), exogenous=test_data[ex_variables])
test_data['Predictions'] = predictions

In [None]:
pred_Fig  = go.Figure([
                       
                       go.Scatter(
                        name= 'Train closes',
                        x = train_data.index,
                        y = train_data['Close'],
                        mode = 'markers'
                    ),
                    
                    go.Scatter(
                        name = 'Test closes',
                        x = test_data.index, 
                        y = test_data['Close'],
                        mode = 'markers'

                    ),

                    go.Scatter(
                        name = 'Predictions',
                        x = test_data.index,
                        y = test_data['Predictions'],
                        mode = 'lines'
                    )
])

pred_Fig.show()