In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import glob
from statsmodels.tsa.arima_model import ARIMA
from tidegauge_functions import read_GPS_SONEL, read_GPS_nam14_UNAVCO, calc_rolling_decomposition_GPS
from statsmodels.tsa.seasonal import seasonal_decompose
%matplotlib inline

In [None]:
# Establish Components of file patern for UNAVCO Data
datum = '????'
ext = '.csv'
meth = '.cwu.nam14'
dir_in = 'data/GPS'

In [None]:
# Join File pattern components
pattern = os.path.join(dir_in, datum + meth + ext)
print(pattern)

In [None]:
# Sort files using glob
filenames = sorted(glob.glob(pattern))

In [None]:
# Finding best p,q
def order_select(df, var, site)
    res = sm.tsa.arma_order_select_ic(df[var], ic=['aic', 'bic'], trend='nc')
    print(f"{site}")
    print(res.aic_min_order)

#result of this SLOW function is "(4, 1)"

In [None]:
# Resample data to monthly
def monthly_averaging(df, site):
    
    df_monthly = df.dropna().resample('1M').mean()
    df_monthly.plot()
    plt.suptitle(f"{site} Monthly GPS Data")
    plt.show()
    
    return df_monthly

In [None]:
def test_SARIMAX_GPS_Monthly(df, var, site, steps, start, end, frequency):
    #Create SARIMAX Model
    mod = sm.tsa.statespace.SARIMAX(df_monthly[var].loc[df.index[0]: start].interpolate(), 
                                                        trend='n',
                                                        order=(4,1,1),
                                                        seasonal_order=(4,1,1,12),
                                                        enforce_stationarity=False,
                                                        enforce_invertibility=False,
                                                        freq= frequency)
    results_SARIMAX = mod.fit()
    
    # Getting 120 months
    SARIMAX_forecast = round(results_SARIMAX.forecast(steps), 2)
   
    # Creating an index
    idx = pd.date_range(start, end, freq=frequency)
 
    SARIMAX_forecast = pd.DataFrame(list(zip(list(idx),list(SARIMAX_forecast))),
                                    columns=['Date','ForecastSSH']).set_index('Date')
    print(f'{site}')
    print(results_SARIMAX.summary())
 
    plt.plot(df[var].dropna(), color='black', marker=',', linestyle='', label='Data')
    plt.plot(results_SARIMAX.fittedvalues, color='red', label='SARIMAX model') 
    plt.plot(SARIMAX_forecast.ForecastSSH, color='blue', label='Forecast') 
    plt.suptitle(f'{site} SARIMAX GPS Forecast {start} to {end}')
    plt.ylabel('Vertical Land Motion [mm]')
    plt.legend()
    plt.savefig(f'figs/test_GPS_SARIMAX_forecast_{site}.png')
    plt.show()
    
    ## Plot forecast - data (residual?)
    plt.plot((SARIMAX_forecast.ForecastSSH) - df_monthly[var].dropna(), 
             color='black', marker='x', linestyle='-', label='Data')
    plt.suptitle(f'{site} SARIMAX  Forecast ERROR')
    plt.ylabel('Model Error (SARIMAX-GPS) [mm]')
    plt.savefig(f'figs/test_GPS_SARIMAX_forecast_ERROR_{site}.png')
    plt.show()


In [None]:
for f, filepath in enumerate(filenames):
    df = read_GPS_nam14_UNAVCO(filepath)
    df_monthly = monthly_averaging(df, filepath[-18:-14])
    order_select(df_monthly, 'Vertical', filepath[-18:-14])

In [None]:
for f, filepath in enumerate(filenames):
    df = read_GPS_nam14_UNAVCO(filepath)
    df_monthly = monthly_averaging(df, filepath[-18:-14])
    test_SARIMAX_GPS_Monthly(df_monthly, 'Vertical', filepath[-18:-14], 134, '2018-1-1', '2030-1-1', 'M')