In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.graphics.api import qqplot
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
from statsmodels.api import tsa
from sklearn.metrics import r2_score
from sklearn.model_selection import TimeSeriesSplit
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('monthly-sunspots.csv', index_col = 'YEAR')
df

In [None]:
df.plot(figsize=(15,8), color = 'purple')

In [None]:
result = adfuller(df['SUNACTIVITY'])
display(result)

In [None]:
pvalue =  result[1]
if pvalue < 0.05:
    print("Stationary")
else:
    print("Non Stationary")

In [None]:
plot_acf(df['SUNACTIVITY'], lags = 40)
plot_pacf(df['SUNACTIVITY'], lags = 40)
plt.show()

In [None]:
model = ARIMA(list(df['SUNACTIVITY']), order = (1,0,1))
result = model.fit()
pred = result.predict()
print(r2_score(df, pred))

In [None]:
plt.plot(list(df['SUNACTIVITY']))
plt.plot(pred, linestyle = '--')
plt.legend(['Actual Sunspots'], ['Predicted SUnspots'])
plt.xlabel('Timesteps')
plt.show()

In [None]:
ax = pd.Series(result.resid).hist()
ax.set_xlabel('Residual')
ax.set_ylabel('Number of Occurences')
plt.show()

In [None]:
result.summary()

In [None]:
data_array = df.values
avg_errors = []
for p in range(1):
    for q in range(13):
        errors = []
        tscv = TimeSeriesSplit(test_size = 10)
        for train_index, test_index in tscv.split(data_array):
            x_train, x_test = data_array[train_index], data_array[test_index]
            x_test_orig = x_test

            fcst = []
            for stop in range(10):
                try:
                    mod = ARIMA(x_train, order = (p,0,q))
                    res = mod.fit()
                    fcst.append(res.forecast(steps = 1))
                except:
                    print("Error")
                    fcst.append(-9999999.)
                x_train = np.concatenate((x_train, x_test[0:1,]))
                x_test = x_test[1:]
            errors.append(r2_score(x_test_orig, fcst))
        pq_result = [p, q, np.mean(errors)]
        print(pq_result)
        avg_errors.append(pq_result)
avg_errors = pd.DataFrame(avg_errors)
avg_errors.columns = ['p', 'q', 'error']
result = avg_errors.pivot(index = 'p', columns = 'q')

In [None]:
dta_array = df.values
X_train, X_test = dta_array[:10], dta_array[-10:]
X_test_orig = X_test

fcst = []
for step in range(10):
    mod = ARIMA(X_train, order = (10,0,9))
    res = mod.fit()
    fcst.append(res.forecast(steps = 1))
    X_train = np.concatenate((X_train, X_test[0:1,:]))
    X_test = X_test[1:]

plt.plot(X_test_orig)
plt.plot(fcst)
plt.legend(['Actual Sunspots', 'Predicted Sunspots'])
plt.xlabel('Time Steps of Test Data')
plt.show()