## Persiapan Dataset

In [None]:
! wget https://www.dropbox.com/s/9bfx2ojb6l4uqen/www-usage.csv

In [None]:
! head www-usage.csv

In [None]:
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot  as  plt
from sklearn.metrics import mean_squared_error
from math import sqrt

# from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.arima_model import ARIMA

In [None]:
df = pd.read_csv("www-usage.csv", header=0)

In [None]:
df_usage = df[["time", "usage"]].copy()

In [None]:
df_usage.head(5)

In [None]:
df_usage.shape[0]

## Split Dataset

In [None]:
size = int(df_usage.shape[0] * 0.85)

In [None]:
print("Total no of records : ",df_usage.shape[0])
print("Number of training records : ",size)

In [None]:
#df_train = df_clean[:size]
#df_test = df_clean[size:]

In [None]:
ts_usage = df_usage['usage']
ts_usage.head()

In [None]:
ts_train = ts_usage[:size]
ts_test = ts_usage[size:]

In [None]:
plt.figure(figsize=(20,5))
plt.plot(ts_train, label='training', color='blue')
plt.plot(ts_test, label='test', color='green')
plt.grid()
plt.xticks(rotation=90)
plt.show()

## Training & Test Model

In [None]:
model = ARIMA(ts_train, order=(3,2,1))
model_arima = model.fit()

In [None]:
print(model_arima.summary())

In [None]:
ts_test.shape[0]

In [None]:
# data_test = df_test['usage']

fc = model_arima.forecast(ts_test.shape[0], alpha=0.05)
fc_series = pd.Series(fc[0], index=ts_test.index)
    
# Plot
plt.figure(figsize=(12,6), dpi=100)
plt.plot(ts_train, label='training')
plt.plot(ts_test, label='actual')
plt.plot(fc_series, label='forecast', color='red')

plt.title('ARIMA Forecasting')
plt.show()



## Penggunaan Grid Serach

Metode ini mencari model terbaik dengan menguji coba setiap parameter p,d,q

In [None]:
def forecast(model, train_data, test_data, title='ARIMA Forecasting'):
    
    fc = model.forecast(test_data.shape[0], alpha=0.05)
    fc_series = pd.Series(fc[0], index=test_data.index)
    
    # Plot
    plt.figure(figsize=(12,6), dpi=100)
    plt.plot(train_data, label='training')
    plt.plot(test_data, label='actual')
    plt.plot(fc_series, label='forecast', color='red')

    plt.title(title)
    plt.show()

Error metriks yang digunakan adalah RMSE

In [None]:
import warnings

def evaluate_arima_model(data_train, data_test, arima_order):

    model = ARIMA(data_train, order=arima_order)
    model_fit = model.fit()
    fc = model_fit.forecast(data_test.size, alpha=0.05)
    fc_series = pd.Series(fc[0], index=data_test.index)

    rmse = sqrt(mean_squared_error(data_test, fc_series))
    #rmse = model_fit.aic

    return rmse

In [None]:
def evaluate_models(data_train, data_test, p_values, d_values, q_values):
	
	best_score, best_cfg = float("inf"), None
	for p in p_values:
		for d in d_values:
			for q in q_values:
				order = (p,d,q)
				try:
					rmse = evaluate_arima_model(data_train, data_test, order)
					if rmse < best_score:
						best_score, best_cfg = rmse, order
					print('ARIMA%s RMSE=%.3f' % (order,rmse))
				except:
					continue
	print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))

In [None]:
p_values = [0, 1, 2, 3, 4]
d_values = range(0, 3)
q_values = range(0, 3)
warnings.filterwarnings("ignore")
evaluate_models(ts_train, ts_test, p_values, d_values, q_values)

In [None]:
model = ARIMA(ts_train, order=(4,2,2))
model_arima = model.fit()

In [None]:
forecast(model_arima, ts_train, ts_test)