In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pmdarima as pm
import statsmodels
from pmdarima import model_selection
import math
import pickle
from sklearn.metrics import mean_squared_error

In [None]:
dataset = "worldcup98_may_minute"
df = pd.read_csv("../data/" + dataset + ".csv", index_col=0, parse_dates=True)
plt.plot(df)
plt.show()
print(df)

In [None]:
freq = 20
downsampled_df = df.resample(str(freq) + "T").mean()
seasonal_order = (24 * 60) // freq
split = 0.8

raw_data = np.asarray(downsampled_df["count"])
# train_size = math.floor(len(raw_data) * split)
train_size = 2000 # same as initial window size
train, test = model_selection.train_test_split(raw_data, train_size=train_size)
plt.plot(downsampled_df.index[:train_size], train, label="Train")
plt.plot(downsampled_df.index[train_size:], test, label="Test")
plt.legend()
plt.show()

In [None]:
diff = pm.arima.ndiffs(train, max_d=5)
seasonal_diff = pm.arima.nsdiffs(train, seasonal_order, max_D=5)
print(diff, seasonal_diff)

In [None]:
from datetime import datetime

start = datetime.now()

model = pm.auto_arima(train, seasonal=True, m=seasonal_order, suppress_warnings=False, d=1, D=1, start_p=0, start_q=0, max_order=6, stepwise=True, trace=True)

end = datetime.now()

print("Estimated in " + str(end - start))


In [None]:
model.summary()

In [None]:
forecasts = model.predict(len(test))

In [None]:
plt.plot(forecasts, label="forecast")
plt.plot(test, label="actual")
plt.legend()
plt.show()