In [None]:
import os
import time

import math
import numpy as np 
import pandas as pd
import seaborn as sns; sns.set(style="ticks", color_codes=True)

from sklearn.metrics import mean_absolute_error as MAE, mean_squared_error as MSE
from sklearn.model_selection import train_test_split, KFold
from sklearn.feature_selection import RFE

import matplotlib as mpl
import matplotlib.pyplot as plt
from IPython.display import display

In [None]:
mpl.rcParams['figure.figsize'] = (20, 13)
mpl.rcParams['axes.grid'] = False

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 256)

from pandas.core.common import SettingWithCopyWarning

import warnings
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

# **Data Loading**

In [None]:
from dateutil.parser import parse

date_parser = lambda date: parse(date)

In [None]:
df = pd.read_csv('../input/sunspots/Sunspots.csv',
                 usecols=['Date', 'Monthly Mean Total Sunspot Number'], 
                 parse_dates=['Date'],
                 date_parser=date_parser)
df.rename(columns={'Monthly Mean Total Sunspot Number': 'Monthly_Average_Sunspot'}, inplace=True)
df.head()

In [None]:
N_YEARS = 50
train_df = df[-2*N_YEARS*12:-N_YEARS*12]
test_df = df[-N_YEARS*12:]

train_size, test_size = len(train_df), len(test_df)
print(train_size, test_size)

compose_df = pd.concat([train_df, test_df])

plt.plot(train_df.Date, train_df.Monthly_Average_Sunspot, 'bo',
         test_df.Date, test_df.Monthly_Average_Sunspot, 'ro')

In [None]:
compose_df.set_index('Date').plot()

# **Modeling**

In [None]:
# Huber Loss, aka Smoothed Mean Absolute Error
import tensorflow as tf
from tensorflow.keras.losses import Huber, Reduction

loss_func = Huber(delta=1.0, 
                  reduction=tf.keras.losses.Reduction.NONE)

In [None]:
loss_df = pd.DataFrame()
loss_df['Date'] = test_df.Date

## **ARIMA**

In [None]:
%%time
from statsmodels.tsa.statespace.sarimax import SARIMAX

train_set = train_df.copy()
train_set.set_index(keys='Date', drop=True, inplace=True)
train_set.index = pd.DatetimeIndex(data=train_set.index.values,
                                   freq=train_set.index.inferred_freq)
print('Frequency by:', train_set.index.inferred_freq)

# https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html
model = SARIMAX(endog=train_set['Monthly_Average_Sunspot'], 
                exog=None, 
                order=(1, 0, 2), # p,d,q - number of AR parameters, differences, and MA parameters
                seasonal_order=(1, 0, 2, 12*11), # P,D,Q,s - AR parameters, differences, MA parameters, and periodicity
                seasonal_periods=2,
                trend='ct', # c: const - t: time
                enforce_invertibility=False, 
                enforce_stationarity=True)

# https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.fit.html#statsmodels.tsa.statespace.sarimax.SARIMAX.fit
ARIMA = model.fit(maxiter=50, 
                  optim_score='approx',
                  cov_type='approx', 
                  method='bfgs',
                  disp=True)

In [None]:
forecast = ARIMA.predict(start=train_size, 
                         end=train_size+test_size-1)
forecast = pd.DataFrame(forecast.values, index=test_df.index, columns=['Prediction'])
# display(forecast.head())
# display(forecast.tail())

In [None]:
forecast['Date'] = test_df.Date
plt.plot(train_df.Date, train_df.Monthly_Average_Sunspot, 'ro',
         test_df.Date, test_df.Monthly_Average_Sunspot, 'yo', 
         forecast.Date, forecast.Prediction, 'bo')

In [None]:
loss = loss_func(test_df.Monthly_Average_Sunspot.values.reshape(-1,1),
                 forecast.Prediction.values.reshape(-1,1)).numpy()
loss_df['SARIMA'] = loss
loss_df['SARIMA'].describe()

## **Facebook Prophet**

In [None]:
from fbprophet import Prophet

prophet = Prophet(
    growth='linear', # linear or logistic
    changepoints=None, # list of dates at which to include potential changepoints
    n_changepoints=1+2*N_YEARS//11, # number of potential changepoints
    changepoint_range=0.6699, # proportion of history in which trend changepoints will be estimated
    yearly_seasonality=False,
    weekly_seasonality=False,
    daily_seasonality=False,
    holidays=None,
    seasonality_mode='multiplicative',
    seasonality_prior_scale=1.69,
    # holidays_prior_scale=6.9,
    changepoint_prior_scale=1.169,
    mcmc_samples=0, # if > 0: Bayesian inference with number of MCMC samples, else: MAP estimation
    interval_width=0.69, # width of the uncertainty intervals provided for the forecast
    uncertainty_samples=690 # number of simulated draws used to estimate uncertainty intervals
)

prophet.add_seasonality(name='decadely', # modified decade: 11 years
                        period=30*12*11,
                        fourier_order=11, 
                        prior_scale=1.69, 
                        mode='additive')

# prophet.add_regressor(name=col, prior_scale=None, standardize='auto', mode='additive')

In [None]:
%%time
prophet.fit(train_df.rename(columns={'Date': 'ds', 
                                     'Monthly_Average_Sunspot': 'y'}))
forecast = prophet.predict(df=test_df[['Date']].rename(columns={'Date': 'ds', }))
# display(forecast.head())
# display(forecast.tail())

In [None]:
plt.plot(train_df.Date, train_df.Monthly_Average_Sunspot, 'ro',
         test_df.Date, test_df.Monthly_Average_Sunspot, 'yo',
         forecast.ds, forecast.yhat, 'bo')

In [None]:
loss = loss_func(test_df.Monthly_Average_Sunspot.values.reshape(-1,1),
                 forecast.yhat.values.reshape(-1,1)).numpy()
loss_df['Prophet'] = loss
loss_df['Prophet'].describe()