## Persiapan Dataset

### Download dataset

In [None]:
! wget https://www.dropbox.com/s/jteeamhlxigq6t0/tourist_arrivals.csv

In [None]:
! head tourist_arrivals.csv

### Persiapan Library

In [None]:
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot  as  plt
from sklearn.metrics import mean_squared_error
from math import sqrt

#from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [None]:
# Helper function for arima grid serach
import warnings

def evaluate_arima_model(data_train, data_test, arima_order, print_result = False):

    model = ARIMA(data_train, order=arima_order)
    model_fit = model.fit()
    fc = model_fit.forecast(data_test.size, alpha=0.05)
    fc_series = pd.Series(fc[0], index=data_test.index)

    rmse = sqrt(mean_squared_error(data_test, fc_series))
    #rmse = model_fit.aic

    if(print_result):
      plt.figure(figsize=(20,5))

      # plt.plot(ts_train, label='train')
      plt.plot(data_test, label='actual')
      plt.plot(fc_series, label='model arima')

      plt.title('ARIMA Evaluation Model')
      plt.legend()
      plt.grid()
      plt.xticks(rotation=90)
      plt.show()


    return rmse

def evaluate_models(data_train, data_test, p_values, d_values, q_values):
	
	best_score, best_cfg = float("inf"), None
	for p in p_values:
		for d in d_values:
			for q in q_values:
				order = (p,d,q)
				try:
					rmse = evaluate_arima_model(data_train, data_test, order)
					if rmse < best_score:
						best_score, best_cfg = rmse, order
					print('ARIMA%s RMSE=%.3f' % (order,rmse))
				except:
					continue
	print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))

### Load dataset ke Pandas Dataframe

In [None]:
df = pd.read_csv("tourist_arrivals.csv")

In [None]:
# tampilkan 5 baris pertama
df.head(5)

In [None]:
df.describe()

In [None]:
df.plot(y=["value"], figsize = (20,5))

### Rubah index ke datetime

In [None]:
df_tourist = df.copy()

In [None]:
df_tourist['date_arrival'] = pd.to_datetime(df_tourist['date'])
df_tourist.info()

In [None]:
df_tourist.head()

In [None]:
df_tourist.set_index('date_arrival', inplace=True)
df_tourist.head()

### Split Dataset

In [None]:
ts_tourist = df_tourist['value']
ts_tourist.head()

In [None]:
ts_train = ts_tourist[:'2017-12-01']
ts_test = ts_tourist['2018-01-01':]

In [None]:
plt.figure(figsize=(20,5))
plt.plot(ts_train, label='training', color='blue')
plt.plot(ts_test, label='test', color='green')
plt.grid()
plt.xticks(rotation=90)
plt.show()

## Model ARIMA

In [None]:
p_values = [0, 1, 2, 3, 4, 6, 8, 9]
d_values = range(0, 4)
q_values = range(0, 3)
warnings.filterwarnings("ignore")
evaluate_models(ts_train, ts_test, p_values, d_values, q_values)

In [None]:
rmse_val = evaluate_arima_model(ts_train, ts_test, (9, 1, 1), True)
print('RMSE=%.3f' % (rmse_val))

## Model SARIMA

In [None]:
model = SARIMAX(ts_train, order=(10,1,7))
# model = SARIMAX(ts_train, order=(9,1,1))
model_sarima01 = model.fit()

In [None]:
n_test = ts_test.shape[0]
ts_prediction01 = model_sarima01.forecast(steps=n_test)

In [None]:
plt.figure(figsize=(20,5))

# plt.plot(ts_train, label='train')
plt.plot(ts_test, label='actual')
plt.plot(ts_prediction01, label='model 01')

plt.title('SARIMA Forecasting')
plt.legend()
plt.grid()
plt.xticks(rotation=90)
plt.show()

In [None]:
from statsmodels.tools.eval_measures import rmse

nrmse = rmse(ts_pred, ts_test)/(np.max(ts_test)-np.min(ts_test))
print('RMSE=%.3f' % (nrmse))

## RMSE=0.050

## Model SARIMA with Seasonal

In [None]:
model_seasonal = SARIMAX(ts_train, order=(10,1,7), seasonal_order=(1,0,1,12))
model_fit_seasonal = model_seasonal.fit()

In [None]:
n_test = ts_test.shape[0]
ts_pred_seasonal = model_fit_seasonal.forecast(steps=n_test)

In [None]:
plt.figure(figsize=(20,5))

# plt.plot(ts_train, label='train')
plt.plot(ts_test, label='actual')
plt.plot(ts_pred_seasonal, label='model 01')

plt.title('SARIMA Forecasting')
plt.legend()
plt.grid()
plt.xticks(rotation=90)
plt.show()

In [None]:
nrmse = rmse(ts_pred_seasonal, ts_test)/(np.max(ts_test)-np.min(ts_test))
print('RMSE=%.3f' % (nrmse))