In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from pandas.plotting import autocorrelation_plot
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('monthly-sunspots.csv', parse_dates=[0])
df.head(10)

In [None]:
plt.figure(figsize=(10,8))
plt.plot(df['Month'], df['Sunspots'], color = 'green', label = 'Sunspot Numbers')
plt.xlabel('Date')
plt.ylabel('Sunspot Numbers')
plt.grid()
plt.title('Monthly Mean Sunspot Numbers')
plt.show()

In [None]:
df.describe().T

In [None]:
df['Month'] = pd.to_datetime(df['Month'])
data_new = df.set_index(df['Month'])
data_new = data_new.drop(labels = ['Month'], axis = 1)
data_new.head()

In [None]:
fig = plt.figure(figsize=(10,8))
data_new['Sunspots'].plot(style = 'k.')

In [None]:
data_q = data_new.resample('q').mean()
data_q.head()

In [None]:
def adfuller_test(data):
    result = adfuller(data)
    labels = ['ADF Test Statistic' 'P-value', 'Lags Used', 'Number of Observation Used']
    for value, label in zip(result, labels):
        print(label+": "+str(value))
    if result[1] <= 0.05:
        print("Strong evidencew against the null hypothesis(h0), reject the null hypothesis. Data has no unit root and is stationary")
    else:
        print("Weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary")

In [None]:
data_q.plot(figsize=(10,8))

In [None]:
base_data = data_q.copy()
base_data['Monthly Mean Total Sunspot Number'] = base_data['Sunspots']
base_data['Shifter Monthly Mean Total Sunspot Number'] = base_data['Monthly Mean Total Sunspot Number'].shift(1)

In [None]:
base_data.head()

In [None]:
base_data[['Monthly Mean Total Sunspot Number', 'Shifter Monthly Mean Total Sunspot Number']].plot()

In [None]:
base_data = base_data.dropna()
print("Mean Squared Error:", mean_squared_error(base_data['Monthly Mean Total Sunspot Number'], 
                                                base_data['Shifter Monthly Mean Total Sunspot Number']))

In [None]:
fig = plt.figure(figsize=(10,8))
autocorrelation_plot(data_q)
plt.show()

In [None]:
fig = plt.figure(figsize=(10,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(data_q, lags = 40, ax = ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(data_q, lags = 40, ax = ax2)

In [None]:
model = sm.tsa.statespace.SARIMAX(data_q['Sunspots'], order=(2,0,2), seasonal_order=(2,0,2,6))
results = model.fit()

In [None]:
results.summary()

In [None]:
data_q['forecast'] = results.predict(start = 1000, end = 1084, dynamic = True)
data_q[['Sunspots', 'forecast']].plot(figsize = (10,8))