In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [None]:
from pylab import rcParams
rcParams['figure.figsize']=15,8

In [None]:
df = pd.read_csv('../input/sunspots/Sunspots.csv')
print(df.shape)
df.head()

In [None]:
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')

In [None]:
df1 = df.drop(['Unnamed: 0', 'Date'], axis=1)
df1.index = df.Date
df1.tail()

In [None]:
y = df1['Monthly Mean Total Sunspot Number']

In [None]:
y.plot()
plt.show()

In [None]:
import statsmodels.api as sm

decomposition = sm.tsa.seasonal_decompose(y,model='additive')
fig = decomposition.plot()
plt.show()

In [None]:
#plot for rolling statistic for testing stationarity
def test_stationarity(timeseries, title):
    
    #Determinig rolling statistics
    rolmean = pd.Series(timeseries).rolling(window=12).mean()
    rolstd = pd.Series(timeseries).rolling(window=12).std()
    
    fig, ax = plt.subplots(figsize=(15,8))
    ax.plot(timeseries,label = title)
    ax.plot(rolmean, label='rolling mean')
    ax.plot(rolstd, label='rolling std(x10)')
    ax.legend()

In [None]:
pd.options.display.float_format = '{:.8f}'.format
test_stationarity(y,'raw data')

In [None]:
#Augmented Dickey-Fuller Test(ADF)
from statsmodels.tsa.stattools import adfuller

def ADF_test(timeseries,dataDesc):
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries,autolag='AIC')
    dfoutput = pd.Series(dftest[0:4],index=['Test Statistic','p-value','#Lags Used','NUmber of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value(%s)'%key]=value
    print(dfoutput)
    
print(ADF_test(y,'raw data'))

In [None]:
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
plot_acf(y)
plt.show()
plot_pacf(y)
plt.show()

In [None]:
#split train and test dataset

y_to_train = y[:'2009-12-01']
y_to_test = y['2010-01-01':]
predict_date = len(y)-len(y[:'2010-01-01'])

In [None]:
#ARIMA MODEL

import itertools

p = d = q  =range(0,2)

pdq = list(itertools.product(p,d,q))

seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p,d,q))]

In [None]:
# Creating an empty Dataframe with column names only
ARIMA_AIC = pd.DataFrame(columns=['param', 'AIC'])
ARIMA_AIC

In [None]:
for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(y,
                                            order= param,
                                           seasonal_order=param_seasonal,
                                           enforce_stationarity=False,
                                           enforce_invertibility=False)
            results = mod.fit()
            
            print('ARIMA{}x{}12 - AIC:{}'.format(param,param_seasonal,results.aic))
            ARIMA_AIC = ARIMA_AIC.append({'param':param, 'AIC': results.aic}, ignore_index=True)
        except:
            continue

In [None]:
## Sort the above AIC values in the ascending order to get the parameters for the minimum AIC value

ARIMA_AIC.sort_values(by='AIC',ascending=True)

In [None]:
mod = sm.tsa.statespace.SARIMAX(y,order=(1,1,1),
                               seasonal_order=(1,1,1,12),
                               enforce_stationary=False,
                               enforce_invertibility=False)

results = mod.fit()
print(results.summary())

In [None]:
results.plot_diagnostics()
plt.show()

In [None]:
#validation Forecasts
pred = results.get_prediction(start = pd.to_datetime('2010-01-31'), dynamic = False)
pred_ci = pred.conf_int()

In [None]:
ax = y['2005':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead forecast', alpha=.7)

ax.fill_between(pred_ci.index,
               pred_ci.iloc[:,0],
               pred_ci.iloc[:,1],color='k',alpha=.2)

ax.set_xlabel('Date')
ax.set_ylabel('Monthly Mean Total Sunspot Number')
plt.grid()
plt.show()

In [None]:
y_forecasted = pred.predicted_mean
y_truth = y['2010-01-31':]

mse = ((y_forecasted-y_truth)**2).mean()
print("The mean squared error of forecast is {}".format(round(mse,2)))
print("The root mean squared error of forecast is {}".format(round(np.sqrt(mse),2)))

In [None]:
#validation Forecasts
pred = results.get_prediction(start = pd.to_datetime('2010-01-31'), dynamic = True, full_results = True)
pred_ci = pred.conf_int()

In [None]:
ax = y['2005':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead forecast', alpha=.7)

ax.fill_between(pred_ci.index,
               pred_ci.iloc[:,0],
               pred_ci.iloc[:,1],color='k',alpha=.2)

ax.set_xlabel('Date')
ax.set_ylabel('Monthly Mean Total Sunspot Number')
plt.grid()
plt.show()

In [None]:
y_forecasted = pred.predicted_mean
y_truth = y['2010-01-01':]

mse = ((y_forecasted-y_truth)**2).mean()
print("The mean squared error of forecast is {}".format(round(mse,2)))
print("The root mean squared error of forecast is {}".format(round(np.sqrt(mse),2)))

In [None]:
#forecasting
pred_uc = results.get_forecast(steps =109)
pred_ci = pred_uc.conf_int()

In [None]:
ax = y_to_test.plot(label='observed',figsize=(20,15))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')

ax.fill_between(pred_ci.index,
               pred_ci.iloc[:, 0],
               pred_ci.iloc[:, 1], color='k',alpha=.25)

ax.set_xlabel('Date')
ax.set_ylabel('Monthly Mean Total Sunspot Number')

plt.grid()
plt.show()

### ***** END*****