# Time Series Modeling (ARIMA Methods) Assignment

In [19]:
import pandas as pd
import numpy as np
import plotly.express as px
import statsmodels.api as sm
from statsmodels.tsa.ar_model import AR
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX

### Import the Amazon stock price data set (AMZN_data.csv) and plot a line chart with the observed daily closing prices.

In [20]:
amazon_df = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%204/AMZN_data.csv')

In [21]:
amazon_df['date'] = pd.to_datetime(amazon_df['date'])

In [22]:
amazon_df.head()

Unnamed: 0,date,open,high,low,close,volume,Name
0,2013-02-08,261.4,265.25,260.555,261.95,3879078,AMZN
1,2013-02-11,263.2,263.25,256.6,257.21,3403403,AMZN
2,2013-02-12,259.19,260.16,257.0,258.7,2938660,AMZN
3,2013-02-13,261.53,269.96,260.3,269.47,5292996,AMZN
4,2013-02-14,267.37,270.65,265.4,269.24,3462780,AMZN


### Run an Autoregression (AR) model on the series and add the results to a column in the dataframe.

In [23]:
model = AR(amazon_df['close']).fit()
amazon_df['AR'] = model.predict()

In [24]:
model.k_ar

23

### Plot a multi-line chart comparing the AR model's results with the observed values.

In [25]:
ar_melt = pd.melt(amazon_df, id_vars='date',
                  value_vars=['close', 'AR'],
                  var_name='Variables',
                  value_name='Values')

In [26]:
px.line(ar_melt, 'date', 'Values', color='Variables', template='none')

### Run a Moving Average (MA) model on the series and add the results to a column in the dataframe.

In [27]:
model = ARMA(amazon_df['close'], order=(0,1)).fit()
amazon_df['MA'] = model.predict()

### Print the model summary and add the model's results to the line chart with observations and the AR model results.

In [28]:
print(model.summary())

                              ARMA Model Results                              
Dep. Variable:                  close   No. Observations:                 1259
Model:                     ARMA(0, 1)   Log Likelihood               -8047.769
Method:                       css-mle   S.D. of innovations            144.300
Date:                Thu, 17 Dec 2020   AIC                          16101.538
Time:                        18:04:04   BIC                          16116.952
Sample:                             0   HQIC                         16107.331
                                                                              
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const         576.9791      8.063     71.560      0.000     561.176     592.782
ma.L1.close     0.9834      0.004    226.363      0.000       0.975       0.992
                                    Roots       

In [29]:
arma_melt = pd.melt(amazon_df, id_vars='date',
                  value_vars=['close', 'AR', 'MA'],
                  var_name='Variables',
                  value_name='Values')

px.line(arma_melt, 'date', 'Values', color='Variables', template='none')

### Run an Autoregressive Moving Average (ARMA) model on the series and add the results to a column in the dataframe.

Hint: You will need to make the time series stationary in order to run the ARMA model on it. The most common way to do this is by differncing, or subtracting the previous observed value from the current one.

In [30]:
amazon_df['Shift'] = amazon_df['close'].shift().fillna(method='bfill')
amazon_df['Diff'] = amazon_df['close'] - amazon_df['Shift']

In [31]:
model = ARMA(amazon_df['Diff'], order=(3,1)).fit()
amazon_df['ARMA'] = model.predict() + amazon_df['Shift']

### Print the model summary and generate a multi-line chart that compares the ARMA model's results with those from the AR and MA models.

Hint: You will need to de-difference the model results by adding the previous observed values to them.

In [32]:
print(model.summary())

                              ARMA Model Results                              
Dep. Variable:                   Diff   No. Observations:                 1259
Model:                     ARMA(3, 1)   Log Likelihood               -4734.617
Method:                       css-mle   S.D. of innovations             10.399
Date:                Thu, 17 Dec 2020   AIC                           9481.234
Time:                        18:04:05   BIC                           9512.063
Sample:                             0   HQIC                          9492.820
                                                                              
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.9188      0.276      3.331      0.001       0.378       1.459
ar.L1.Diff     0.2142      0.288      0.745      0.456      -0.349       0.778
ar.L2.Diff     0.0331      0.029      1.131      0.2

In [33]:
arma_melt = pd.melt(amazon_df, id_vars='date',
                  value_vars=['close', 'AR', 'MA', 'ARMA'],
                  var_name='Variables',
                  value_name='Values')

px.line(arma_melt, 'date', 'Values', color='Variables', template='none')

### Run an Autoregressive Integrated Moving Average (ARIMA) model on the series and add the results to a column in the dataframe.

In [34]:
model = ARIMA(amazon_df['close'], order=(1,2,1)).fit()
amazon_df['ARIMA'] = model.predict(typ='levels')

### Print the model summary and generate a multi-line chart that compares the ARIMA model's results to the observed values.

In [35]:
print(model.summary())

                             ARIMA Model Results                              
Dep. Variable:               D2.close   No. Observations:                 1257
Model:                 ARIMA(1, 2, 1)   Log Likelihood               -4732.014
Method:                       css-mle   S.D. of innovations             10.410
Date:                Thu, 17 Dec 2020   AIC                           9472.029
Time:                        18:04:06   BIC                           9492.575
Sample:                             2   HQIC                          9479.750
                                                                              
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
const              0.0020      0.001      2.498      0.013       0.000       0.004
ar.L1.D2.close    -0.0129      0.028     -0.455      0.649      -0.068       0.043
ma.L1.D2.close    -1.0000      0.003

In [36]:
arima_melt = pd.melt(amazon_df, id_vars='date',
                  value_vars=['close', 'AR', 'MA', 'ARMA', 'ARIMA'],
                  var_name='Variables',
                  value_name='Values')

px.line(arima_melt, 'date', 'Values', color='Variables', template='none')

### Run an Seasonal Autoregressive Integrated Moving Average (SARIMA) model on the series and add the results to a column in the dataframe.

In [37]:
model = SARIMAX(amazon_df['close'], order=(1,2,1), seasonal_order=(0,0,0,0)).fit()
amazon_df['SARIMA'] = model.predict()

### Print the model summary and generate a multi-line chart that compares the SARMA model's results with the observed values.

In [38]:
print(model.summary())

                           Statespace Model Results                           
Dep. Variable:                  close   No. Observations:                 1259
Model:               SARIMAX(1, 2, 1)   Log Likelihood               -4733.752
Date:                Thu, 17 Dec 2020   AIC                           9473.505
Time:                        18:04:07   BIC                           9488.914
Sample:                             0   HQIC                          9479.296
                               - 1259                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.0130      0.018     -0.735      0.463      -0.048       0.022
ma.L1         -0.9952      0.003   -339.899      0.000      -1.001      -0.989
sigma2       108.8786      1.276     85.323      0.0

In [39]:
sarima_melt = pd.melt(amazon_df, id_vars='date',
                  value_vars=['close', 'AR', 'MA', 'ARMA', 'ARIMA', 'SARIMA'],
                  var_name='Variables',
                  value_name='Values')

px.line(sarima_melt, 'date', 'Values', color='Variables', template='none')

### Run an Seasonal Autoregressive Integrated Moving Average with Exogenous Factors (SARIMAX) model on the series and add the results to a column in the dataframe.

Use the daily Open prices as the exogenous factors.

In [40]:
model = SARIMAX(amazon_df['close'], amazon_df['open'], order=(1,2,1), seasonal_order=(1,1,1,1)).fit()
amazon_df['SARIMAX'] = model.predict()

### Print the model summary and generate a multi-line chart that compares the SARMAX model's results with the SARIMA model's values.

In [41]:
print(model.summary())

                                 Statespace Model Results                                
Dep. Variable:                             close   No. Observations:                 1259
Model:             SARIMAX(1, 2, 1)x(1, 1, 1, 1)   Log Likelihood               -4558.729
Date:                           Thu, 17 Dec 2020   AIC                           9129.458
Time:                                   18:04:11   BIC                           9160.272
Sample:                                        0   HQIC                          9141.040
                                          - 1259                                         
Covariance Type:                             opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
open           0.5822      0.009     67.470      0.000       0.565       0.599
ar.L1         -0.2107      0.009    -24.147

In [42]:
sarimax_melt = pd.melt(amazon_df, id_vars='date',
                  value_vars=['close', 'AR', 'MA', 'ARMA', 'ARIMA', 'SARIMA', 'SARIMAX'],
                  var_name='Variables',
                  value_name='Values')

px.line(sarimax_melt, 'date', 'Values', color='Variables', template='none')

### Evaluate the performance of all the models and compare to each other, using Mean Absolute Error and Root Mean Squared Error as your evaluation metrics.

In [43]:
models = ['AR', 'MA', 'ARMA', 'ARIMA', 'SARIMA', 'SARIMAX']

for model in models:
  diff = amazon_df['close'] - amazon_df[model]
  mae = diff.abs().mean()
  rmse = np.sqrt(np.mean(diff**2))
  print(f'Model {model} - MAE: {mae:.4f} RMSE: {rmse:.4f}')

Model AR - MAE: 6.6368 RMSE: 10.3990
Model MA - MAE: 124.5777 RMSE: 144.6307
Model ARMA - MAE: 6.5946 RMSE: 10.3986
Model ARIMA - MAE: 6.5841 RMSE: 10.4230
Model SARIMA - MAE: 6.8937 RMSE: 13.3396
Model SARIMAX - MAE: 6.3037 RMSE: 10.1546


#Lecture Notes

In [44]:
df = pd.read_csv('/content/LA_weather.csv')

In [45]:
df['Date'] = pd.to_datetime(df['Date'])

In [46]:
df['Date'] = df['Date'].dt.date

In [47]:
df = df.groupby('Date', as_index=False).mean()

In [48]:
px.line(df, 'Date', 'Observed', template='none')

In [49]:
model = AR(df['Observed']).fit()
df['AR'] = model.predict()

In [50]:
model.k_ar

25

In [51]:
melt = pd.melt(df, id_vars='Date', value_vars=['Observed', 'AR'],
               var_name='variable')

px.line(melt, 'Date', 'value', color='variable', template='none')

In [52]:
model = ARMA(df['Observed'], order=(0,1)).fit()
df['MA'] = model.predict()
print(model.summary())

                              ARMA Model Results                              
Dep. Variable:               Observed   No. Observations:                 1887
Model:                     ARMA(0, 1)   Log Likelihood               -4692.027
Method:                       css-mle   S.D. of innovations              2.907
Date:                Thu, 17 Dec 2020   AIC                           9390.054
Time:                        18:04:12   BIC                           9406.682
Sample:                             0   HQIC                          9396.177
                                                                              
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
const            290.8455      0.125   2330.880      0.000     290.601     291.090
ma.L1.Observed     0.8649      0.008    102.565      0.000       0.848       0.881
                                    

In [53]:
melt = pd.melt(df, id_vars='Date', value_vars=['Observed', 'AR', 'MA'],
               var_name='variable')

px.line(melt, 'Date', 'value', color='variable', template='none')

In [54]:
model = ARMA(df['Observed'], order=(1,1)).fit()
df['ARMA'] = model.predict()
print(model.summary())

                              ARMA Model Results                              
Dep. Variable:               Observed   No. Observations:                 1887
Model:                     ARMA(1, 1)   Log Likelihood               -3521.375
Method:                       css-mle   S.D. of innovations              1.563
Date:                Thu, 17 Dec 2020   AIC                           7050.750
Time:                        18:04:13   BIC                           7072.921
Sample:                             0   HQIC                          7058.915
                                                                              
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
const            290.8762      0.535    543.399      0.000     289.827     291.925
ar.L1.Observed     0.9109      0.010     91.685      0.000       0.891       0.930
ma.L1.Observed     0.3324      0.023

In [55]:
melt = pd.melt(df, id_vars='Date', value_vars=['Observed', 'AR', 'MA', 'ARMA'],
               var_name='variable')

px.line(melt, 'Date', 'value', color='variable', template='none')

In [56]:
model = ARIMA(df['Observed'], order=(1,1,1)).fit()
df['ARIMA'] = model.predict(typ='levels')
print(model.summary())

                             ARIMA Model Results                              
Dep. Variable:             D.Observed   No. Observations:                 1886
Model:                 ARIMA(1, 1, 1)   Log Likelihood               -3556.135
Method:                       css-mle   S.D. of innovations              1.595
Date:                Thu, 17 Dec 2020   AIC                           7120.269
Time:                        18:04:14   BIC                           7142.438
Sample:                             1   HQIC                          7128.433
                                                                              
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
const                0.0025      0.045      0.054      0.957      -0.086       0.091
ar.L1.D.Observed    -0.1947      0.068     -2.858      0.004      -0.328      -0.061
ma.L1.D.Observed     0.4721 

In [57]:
melt = pd.melt(df, id_vars='Date', value_vars=['Observed', 'AR', 'MA', 'ARMA', 'ARIMA'],
               var_name='variable')

px.line(melt, 'Date', 'value', color='variable', template='none')

In [58]:
model = SARIMAX(df['Observed'], order=(1,1,1), seasonal_order=(1,1,1,4)).fit()
df['SARIMA'] = model.predict()
print(model.summary())

                                 Statespace Model Results                                
Dep. Variable:                          Observed   No. Observations:                 1887
Model:             SARIMAX(1, 1, 1)x(1, 1, 1, 4)   Log Likelihood               -3548.091
Date:                           Thu, 17 Dec 2020   AIC                           7106.182
Time:                                   18:04:17   BIC                           7133.882
Sample:                                        0   HQIC                          7116.384
                                          - 1887                                         
Covariance Type:                             opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.1984      0.065     -3.074      0.002      -0.325      -0.072
ma.L1          0.4562      0.061      7.470

In [59]:
melt = pd.melt(df, id_vars='Date', 
               value_vars=['Observed', 'AR', 'MA', 'ARMA', 'ARIMA', 'SARIMA'],
               var_name='variable')

px.line(melt, 'Date', 'value', color='variable', template='none')

In [60]:
model = SARIMAX(df['Observed'], df['Humidity'], order=(1,1,1), seasonal_order=(0,0,0,0)).fit()
df['SARIMAX'] = model.predict()
print(model.summary())

                           Statespace Model Results                           
Dep. Variable:               Observed   No. Observations:                 1887
Model:               SARIMAX(1, 1, 1)   Log Likelihood               -3434.539
Date:                Thu, 17 Dec 2020   AIC                           6877.079
Time:                        18:04:19   BIC                           6899.248
Sample:                             0   HQIC                          6885.243
                               - 1887                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Humidity      -0.0475      0.002    -22.935      0.000      -0.052      -0.043
ar.L1         -0.3060      0.064     -4.799      0.000      -0.431      -0.181
ma.L1          0.5464      0.057      9.619      0.0

In [61]:
melt = pd.melt(df.iloc[5:], id_vars='Date', 
               value_vars=['Observed', 'AR', 'MA', 'ARMA', 'ARIMA', 'SARIMA', 'SARIMAX'],
               var_name='variable')

px.line(melt, 'Date', 'value', color='variable', template='none')

In [62]:
models = ['AR', 'MA', 'ARMA', 'ARIMA', 'SARIMA', 'SARIMAX']

for model in models:
  diff = df['Observed'] - df[model]
  mae = diff.abs().mean()
  rmse = np.sqrt(np.mean(diff**2))
  print(f'Model {model} - MAE: {mae:.4f} RMSE: {rmse:.4f}')

Model AR - MAE: 1.1114 RMSE: 1.5053
Model MA - MAE: 2.3452 RMSE: 2.9081
Model ARMA - MAE: 1.1566 RMSE: 1.5633
Model ARIMA - MAE: 1.1793 RMSE: 1.5925
Model SARIMA - MAE: 1.4227 RMSE: 7.7114
Model SARIMAX - MAE: 1.2642 RMSE: 6.9767


In [63]:
apple_df = pd.read_csv('/content/AAPL.csv')
apple_df['Date'] = pd.to_datetime(apple_df['Date'])
apple_df = apple_df[['Date', 'Close']]

In [64]:
px.line(apple_df, 'Date', 'Close')

In [65]:
apple_df['Rolling_Mean'] = apple_df['Close'].rolling(window=252).mean()
apple_df['Rolling_STD'] = apple_df['Close'].rolling(window=252).std()

In [66]:
melted = pd.melt(apple_df, id_vars='Date', 
                 value_vars=['Close', 'Rolling_Mean', 'Rolling_STD'],
                 var_name='Variable',
                 value_name='Value',
                 )
px.line(melted, 'Date', 'Value', color='Variable', title='Rolling Mean vs STD', template='none')

In [67]:
adf_test = sm.tsa.stattools.adfuller(apple_df['Close'])
adf_test

(0.635206667371746,
 0.9884520189151633,
 26,
 2490,
 {'1%': -3.4329789374204935,
  '10%': -2.5673882843825098,
  '5%': -2.8627014485379787},
 11794.552235233201)

In [68]:
apple_df['diff'] = apple_df['Close'].diff()

In [69]:
apple_df['diff'] = apple_df['diff'].fillna(method='bfill')

In [70]:
adf_test = sm.tsa.stattools.adfuller(apple_df['diff'])
adf_test

(-8.92510865822881,
 1.0162809559708238e-14,
 25,
 2491,
 {'1%': -3.4329778809556224,
  '10%': -2.5673880359934063,
  '5%': -2.86270098200392},
 11792.949113518194)