In [1]:
import pandas as pd
import numpy as np
from math import floor

df = pd.read_csv("CSIRO_Recons_gmsl_yr_2019.csv", header=0, index_col=0)
df.drop(columns=["GMSL uncertainty (mm)"], axis=1, inplace=True)
df.rename(columns={"Time": "year", "GMSL (mm)": "sea_level"}, inplace=True)

df.index = np.floor(df.index).astype(int)
df.index = pd.to_datetime(df.index, format="%Y").year
df.head()

Unnamed: 0_level_0,sea_level
Time,Unnamed: 1_level_1
1880,-30.3
1881,-24.7
1882,-41.5
1883,-36.2
1884,-15.3


In [2]:
# grid search ARIMA parameters for time series
import warnings
from math import sqrt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
 
# evaluate an ARIMA model for a given order (p,d,q)
def evaluate_arima_model(X, arima_order):
    # prepare training dataset
    train_size = int(len(X) * 0.66)
    train, test = X[0:train_size], X[train_size:]
    history = [x for x in train]
    # make predictions
    predictions = list()
    for t in range(len(test)):
        model = ARIMA(history, order=arima_order)
        model_fit = model.fit()
        yhat = model_fit.forecast()[0]
        predictions.append(yhat)
        history.append(test[t])
    # calculate out of sample error
    rmse = sqrt(mean_squared_error(test, predictions))
    return rmse
 
# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values):
    dataset = dataset.astype('float32')
    best_score, best_cfg = float("inf"), None
    for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p,d,q)
                try:
                    rmse = evaluate_arima_model(dataset, order)
                    if rmse < best_score:
                        best_score, best_cfg = rmse, order
                    print('ARIMA%s RMSE=%.3f' % (order,rmse))
                except:
                    continue
    print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))
 
# evaluate parameters
p_values = [0, 1, 2, 4, 6, 8, 10]
d_values = range(0, 3)
q_values = range(0, 3)
warnings.filterwarnings("ignore")
evaluate_models(df.values, p_values, d_values, q_values)

ARIMA(0, 0, 0) RMSE=106.475
ARIMA(0, 0, 1) RMSE=57.321
ARIMA(0, 0, 2) RMSE=35.460
ARIMA(0, 1, 0) RMSE=6.114
ARIMA(0, 1, 1) RMSE=6.234
ARIMA(0, 1, 2) RMSE=6.262
ARIMA(0, 2, 0) RMSE=8.882
ARIMA(0, 2, 1) RMSE=5.590
ARIMA(0, 2, 2) RMSE=5.357
ARIMA(1, 0, 0) RMSE=6.234
ARIMA(1, 0, 1) RMSE=6.339
ARIMA(1, 0, 2) RMSE=6.365
ARIMA(1, 1, 0) RMSE=6.145
ARIMA(1, 1, 1) RMSE=6.252
ARIMA(1, 1, 2) RMSE=5.866
ARIMA(1, 2, 0) RMSE=6.793
ARIMA(1, 2, 1) RMSE=5.308
ARIMA(1, 2, 2) RMSE=5.626
ARIMA(2, 0, 0) RMSE=6.254
ARIMA(2, 0, 1) RMSE=6.305
ARIMA(2, 0, 2) RMSE=6.501
ARIMA(2, 1, 0) RMSE=6.287
ARIMA(2, 1, 1) RMSE=6.261
ARIMA(2, 1, 2) RMSE=6.254
ARIMA(2, 2, 0) RMSE=5.995
ARIMA(2, 2, 1) RMSE=5.395
ARIMA(2, 2, 2) RMSE=17.643
ARIMA(4, 0, 0) RMSE=6.448
ARIMA(4, 0, 1) RMSE=6.738
ARIMA(4, 0, 2) RMSE=6.728
ARIMA(4, 1, 0) RMSE=6.372
ARIMA(4, 1, 1) RMSE=5.673
ARIMA(4, 2, 0) RMSE=6.362
ARIMA(4, 2, 1) RMSE=5.676
ARIMA(4, 2, 2) RMSE=5.859
ARIMA(6, 0, 0) RMSE=12.243
ARIMA(6, 0, 1) RMSE=16.512
ARIMA(6, 0, 2) RMSE=6.706
ARIMA