In [163]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.cluster import KMeans
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from fbprophet import Prophet
import plotly.express as px

In [164]:
df = pd.read_csv('/Users/robholmstrom/OneDrive/Gitrepos/nonrepo_files/DEXUSEU.csv')
df.head()
df['DATE'] = pd.to_datetime(df['DATE'])
df = df.set_index('DATE')
df = df[df['DEXUSEU']!='.']
df['DEXUSEU'] = pd.to_numeric(df['DEXUSEU'])
df


Unnamed: 0_level_0,DEXUSEU
DATE,Unnamed: 1_level_1
2015-07-17,1.0848
2015-07-20,1.0850
2015-07-21,1.0927
2015-07-22,1.0884
2015-07-23,1.0976
...,...
2020-07-13,1.1364
2020-07-14,1.1406
2020-07-15,1.1406
2020-07-16,1.1433


In [165]:
px.line(df)

In [166]:
decomposition = sm.tsa.seasonal_decompose(df, period = 252)
px.line(x = df.index, y =decomposition.trend)

Plot the time series, along with any logical or necessary differences to get a stationary dataset

In [167]:
df['daily_change'] = df['DEXUSEU']-df['DEXUSEU'].shift(1).fillna(method= 'bfill')
px.line(x = df.index, y = df['daily_change'])
adf = adfuller(df['daily_change'])
list(adf)

[-34.47541912968418,
 0.0,
 0,
 1250,
 {'1%': -3.435592223709696, '5%': -2.8638549702604803, '10%': -2.56800251776},
 -9325.57424739822]

In [168]:
dir(sm.tsa)

['AR',
 'ARIMA',
 'ARMA',
 'ArmaProcess',
 'AutoReg',
 'DynamicFactor',
 'ExponentialSmoothing',
 'Holt',
 'MarkovAutoregression',
 'MarkovRegression',
 'SARIMAX',
 'STL',
 'SVAR',
 'SimpleExpSmoothing',
 'UnobservedComponents',
 'VAR',
 'VARMAX',
 'VECM',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'acf',
 'acovf',
 'add_lag',
 'add_trend',
 'adfuller',
 'arima',
 'arma_generate_sample',
 'arma_order_select_ic',
 'bds',
 'bk_filter',
 'ccf',
 'ccovf',
 'cf_filter',
 'coint',
 'datetools',
 'detrend',
 'filters',
 'graphics',
 'hp_filter',
 'innovations',
 'interp',
 'kpss',
 'lagmat',
 'lagmat2ds',
 'pacf',
 'pacf_ols',
 'pacf_yw',
 'periodogram',
 'q_stat',
 'seasonal_decompose',
 'statespace',
 'stattools',
 'tsatools',
 'var',
 'x13_arima_analysis',
 'x13_arima_select_order']

Generate and interpret a PACF for the time series (and/or its differences)

In [169]:
pacf_lag = sm.tsa.pacf(df['DEXUSEU'], nlags = 20, method = 'ols')
px.line(pacf_lag)

In [170]:
pacf_lag2 = sm.tsa.pacf(df['daily_change'], nlags = 20, method = 'ols')
px.line(pacf_lag2)

Generate 5 ARIMA specifications and evaluate their efficacy at modeling your data

In [171]:
df['daily_change']

DATE
2015-07-17    0.0000
2015-07-20    0.0002
2015-07-21    0.0077
2015-07-22   -0.0043
2015-07-23    0.0092
               ...  
2020-07-13    0.0046
2020-07-14    0.0042
2020-07-15    0.0000
2020-07-16    0.0027
2020-07-17    0.0005
Name: daily_change, Length: 1251, dtype: float64

In [172]:
model1 = ARIMA(df['daily_change'], order=(1, 1, 1)).fit()
model2 = ARIMA(df['daily_change'], order=(2, 1, 1)).fit()
model3 = ARIMA(df['daily_change'], order=(3, 1, 1)).fit()
# model4 = ARIMA(df['daily_change'], order=(1, 2, 1)).fit()
# model5 = ARIMA(df['daily_change'], order=(1, 2, 1)).fit()



A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.



In [176]:
df['ARIMA1'] = model1.predict(typ='levels')
df['ARIMA2'] = model2.predict(typ='levels')
df['ARIMA3'] = model3.predict(typ='levels')
# df['ARIMA4'] = model4.predict()
# df['ARIMA5'] = model5.predict()


Finally choose one specification and make the case for that as a logical model for your dataset

In [177]:
models = ['ARIMA1','ARIMA2', 'ARIMA3']

for model in models:
  diff = df['DEXUSEU'] - df[model]
  mae = diff.abs().mean()
  rmse = np.sqrt(np.mean(diff**2))
  print(f'Model {model} - MAE: {mae} - RMSE: {rmse}')

Model ARIMA1 - MAE: 1.128355233174293 - RMSE: 1.129158583773379
Model ARIMA2 - MAE: 1.128397558034718 - RMSE: 1.1291949644620742
Model ARIMA3 - MAE: 1.128384438048577 - RMSE: 1.1291825570334286
