In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import pandas as pd
import numpy as np

plt.style.use('default')

In [None]:
data = pd.read_csv("../input/learn-time-series-forecasting-from-gold-price/gold_price_data.csv")
data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d')
print("Number of rows and columns:", data.shape)
data.head()

In [None]:
f, ax = plt.subplots(figsize=(16, 4))
sns.lineplot(data=data, x="Date", y="Value")

ax.set_axisbelow(True)
ax.minorticks_on()
ax.grid(which='major', linestyle='-', linewidth=0.5, color='black',)
ax.grid(which='minor', linestyle=':', linewidth=0.5, color='black', alpha=0.7)

ax.set_xlim([datetime(1970,1,1), datetime(2021,1,1)]);

In [None]:
f, ax = plt.subplots(figsize=(16, 4))
data_resample_y = data.resample(rule='Y', on='Date').mean()
data_resample_m = data.resample(rule='M', on='Date').mean()
data_resample_q = data.resample(rule='Q', on='Date').mean()

sns.lineplot(data=data, x="Date", y="Value", label="original")
sns.lineplot(data=data_resample_y, x="Date", y="Value", label="year frequency")
sns.lineplot(data=data_resample_q, x="Date", y="Value", label="quarter frequency")
sns.lineplot(data=data_resample_m, x="Date", y="Value", label="month frequency");

In [None]:
pd.plotting.autocorrelation_plot(data_resample_y);

In [None]:
f, ax = plt.subplots(figsize=(16, 4))
data_rolling = data.rolling(365, center=True, min_periods=360).mean()

sns.lineplot(data=data, x="Date", y="Value")
sns.lineplot(data=data, x="Date", y=data_rolling["Value"]);

In [None]:
data_diff=data.diff().dropna()

fig = plt.figure(figsize = (16,4))
sns.lineplot(x=data["Date"], y=data_diff["Value"]);

In [None]:
data_diff_sample=data[2000:2500].diff(axis = 0, periods = 7).dropna()

fig = plt.figure(figsize = (16,4))
sns.lineplot(x=data["Date"][2000:2500], y=data_diff_sample["Value"]);

In [None]:
fig = plt.figure(figsize = (10,5))
sns.histplot(data_diff["Value"], bins=50, kde=True);
# sns.kdeplot(data_diff["Value"]);

## Augmented Dickey-Fuller test

In [None]:
from statsmodels.tsa.stattools import adfuller

result = adfuller(data_diff["Value"])

print(f'ADF Statistic: {result[0]:.3f}')
print(f'p-value: {result[1]:.3f}')

for key, value in result[4].items():
    print(f'Critical values({key}): {value:.3f}')
    
if result[0]<result[4]['1%']:
    print("Dataset is stationary!")
    


stat, p, lags, obs, crit, t = adfuller(data_diff["Value"])
print(f'stat: {stat:.5f}\np-value: {p:.5f}')

if p > 0.05:
    print('Probably not Stationary')
else:
    print('Probably Stationary')

## Kwiatkowski-Phillips-Schmidt-Shin

In [None]:
from statsmodels.tsa.stattools import kpss

stat, p, lags, crit = kpss(data_diff["Value"])
print(f'stat: {stat:.5f}\np-value: {p:.5f}')

if p > 0.05:
    print('Probably not Stationary')
else:
    print('Probably Stationary')

## Order of differencing (d)

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

fig, axes = plt.subplots(3, 1, sharex=True, figsize = (16,7))

plot_acf(data["Value"], ax=axes[0])
axes[0].set_title('Original series')

plot_acf(data["Value"].diff().dropna(), ax=axes[1])
axes[1].set_title('1st order differencing')

plot_acf(data["Value"].diff().diff().dropna(), ax=axes[2])
axes[2].set_title('2nd order differencing');

In [None]:
!pip install pmdarima

In [None]:
from pmdarima.arima.utils import ndiffs

print(f"ADF Test: {ndiffs(data['Value'], test='adf')}")
print(f"KPSS test: {ndiffs(data['Value'], test='kpss')}")
print(f"PP test: {ndiffs(data['Value'], test='pp')}")

## Order of AR term (p)

In [None]:
from pylab import rcParams

rcParams['figure.figsize'] = 16,5
plot_pacf(data["Value"].diff().dropna())
plt.show()

## Order of MA term (q)

In [None]:
rcParams['figure.figsize'] = 16,5
plot_acf(data["Value"].diff().dropna())
plt.show()

In [None]:
data_change=data["Value"].pct_change(periods=1)

fig = plt.figure(figsize = (16,4))
sns.lineplot(x=data["Date"], y=data_change);

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
from pylab import rcParams

result = seasonal_decompose(data['Value'], model='multiplicative', period=120)

rcParams['figure.figsize'] = 10,7
result.plot();

In [None]:
data_log = np.log(data["Value"])

fig = plt.figure(figsize = (16,4))
sns.lineplot(x=data["Date"], y=data_log);

In [None]:
df_temp = data[:20].copy()
df_temp['Year'] = pd.DatetimeIndex(df_temp['Date']).year
df_temp['Month'] = pd.DatetimeIndex(df_temp['Date']).month

plt.figure(figsize=(10,5))
sns.pointplot(x='Month', y='Value', hue='Year', data=df_temp);

# ARIMA model

In [None]:
from statsmodels.tsa.arima.model import ARIMA

train = data["Value"][:10500]
test = data["Value"][10500:]

model = ARIMA(train, order=(1,1,1))
model_fit = model.fit()
print(model_fit.summary())

In [None]:
rcParams['figure.figsize'] = 12,3
residuals = pd.DataFrame(model_fit.resid)

fig, ax = plt.subplots(1,2)
residuals.plot(title="Residuals", ax=ax[0])
residuals.plot(kind='kde', title='Density', ax=ax[1]);

In [None]:
fc, se, conf = model_fit.forecast(steps=3, alpha=0.05)
# arima_predict = model_fit.predict(start=0, end=15000, dynamic=False)

fc_series = pd.Series(fc, index=test.index)

In [None]:
f, ax = plt.subplots(figsize=(16, 4))

sns.lineplot(data=data, x="Date", y="Value", label='Actual')
sns.lineplot(data=data, x="Date", y=train, color='orange', label='Train')
sns.lineplot(data=data, x="Date", y=fc_series, color='g', label='Forecast');
# sns.lineplot(data=data, x="Date", y=arima_predict, color='brown', label='Predict');

In [None]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

model = SARIMAX(train, order=(1,1,1))
model_fit = model.fit()
print(model_fit.summary())

In [None]:
sarimax_predict = model_fit.predict(start=0, end=15000, dynamic=False)

f, ax = plt.subplots(figsize=(16, 4))

sns.lineplot(data=data, x="Date", y="Value", label='Actual')
sns.lineplot(data=data, x="Date", y=sarimax_predict, color='brown', label='Predict');

# Prophet

In [None]:
data_fb=data.copy()
data_fb = data_fb.rename(columns={'Date':'ds','Value':'y'})

train_fb = data_fb[:10500]
test_fb = data_fb[10500:]

In [None]:
from fbprophet import Prophet

model_fb = Prophet()
model_fb.fit(train_fb)

future = model_fb.make_future_dataframe(periods=200)
forecast_fb = model_fb.predict(future)

In [None]:
forecast_fb_plot = forecast_fb[10500:]

plt.figure(figsize=(16,5))
plt.plot(train_fb.ds, train_fb['y'], label = 'Train')
plt.plot(test_fb.ds, test_fb['y'], label='Test')
plt.plot(forecast_fb_plot.ds, forecast_fb_plot['yhat'], label='Prophet Forecast')
plt.legend(loc='best');

# Holt-Winters

In [None]:
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from statsmodels.tsa.holtwinters import ExponentialSmoothing

train_hw = data["Value"][:10500]
test_hw = data["Value"][10500:]

future = ExponentialSmoothing(train_hw, trend='mul').fit()
forecast_hw = future.forecast(300)

plt.figure(figsize=(16,4))
plt.plot(train_hw, label='Train')
plt.plot(test_hw, label='Test')
plt.plot(forecast_hw, label='Forecast')
plt.legend(loc='best');