In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

import io
import pyarrow.parquet as pq
import pickle

from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA

from statistics import mean
# Optmization
from bayes_opt import BayesianOptimization

### Load Data

In [3]:
PATH_DATA='D:/DSLS/Data Science/data/raw/'
filename_store='/preprocessed_dataset.pkl'
filename_valid='/validation_dataset.pkl'
filename_rec = '/recurring_dataset.pkl'

with open(PATH_DATA+filename_store, 'rb') as f:
  df = pickle.load(f)

with open(PATH_DATA+filename_valid, 'rb') as f:
  df_val = pickle.load(f)

with open(PATH_DATA+filename_rec, 'rb') as f:
  df_rec = pickle.load(f)

In [10]:
streets = ['Abdul Wahab',
         'Abdurrahman',
         'Anggrek',
         'Arah Tol Lkr Luar/TMII/Cawang',
         'Arif Rahman Hakim']

In [11]:
samples=list()

for street in streets:
    for d in range(7):
        samples.append(df_rec[street][d])

### Hyperparameter Tuning (GRID SEARCH)

In [39]:
def split_train_test(data):
    train_data = data[:int(0.9*(len(data)))]
    test_data = data[int(0.9*(len(data))):]
    
#     print(f"shape of train: {train_data.shape}")
#     print(f"shape of test: {test_data.shape}")
    
    return train_data, test_data

In [53]:
def black_box_arima(p,d,q):
    mape =  list()
    
    for sample in samples:
        # splitting
        train, test = split_train_test(sample)

        data_train=train['median_delay']
        data_test=test['median_delay']
        
        # modeling 
        y = data_train
        ARIMAmodel = ARIMA(y, order = (int(p), int(d), int(q)))
        ARIMAmodel = ARIMAmodel.fit()
        
        # predict
        y_pred = ARIMAmodel.get_forecast(len(data_test.index))
        y_pred_df = y_pred.conf_int(alpha = 0.1) 
        y_pred_df["Predictions"] = ARIMAmodel.predict(start = y_pred_df.index[0], end = y_pred_df.index[-1])
        y_pred_df.index = data_test.index
        y_pred_out = y_pred_df["Predictions"] 
        
        #plot
#         plt.figure(figsize=(12, 5))
#         plt.plot(data_train, color='blue', label = 'Train')
#         plt.plot(data_test, color='orange', label = 'Test')
#         plt.plot(y_pred_out, color='green', label = 'Predictions')
#         plt.legend()
#         plt.show()
        
        mape.append(mean_absolute_percentage_error(data_test.values, y_pred_df["Predictions"]))
        
    return mean(mape)

In [66]:
train, test = split_train_test(samples[0])

data_train=train['median_delay']
data_test=test['median_delay']

# modeling 
y = data_train
mape = black_box_arima(0,1,3)

print(mape)

0.38835101212710915


In [73]:
param_bounds = {
    'p':[i for i in range(5)],
    'd':[i for i in range(5)],
    'q':[i for i in range(5)]
}

In [None]:
print(f"  p  |  d  |  q  |  mape")
print(f"-------------------------------------------")

for p in param_bounds['p']:
    for d in param_bounds['q']:
        for q in param_bounds['q']:
            try:
                mape = black_box_arima(p,d,q)
            except:
                mape ='Failed'
                continue
            
            print(f"  {p}  |  {d}  |  {q}  |  {mape}")

  p  |  d  |  q  |  mape
-------------------------------------------
  0  |  0  |  0  |  Failed
  0  |  0  |  1  |  Failed
  0  |  0  |  2  |  Failed
  0  |  0  |  3  |  Failed
  0  |  0  |  4  |  Failed
  0  |  1  |  0  |  0.524076009511157
  0  |  1  |  1  |  0.47314669800083453
  0  |  1  |  2  |  Failed
  0  |  1  |  3  |  0.38835101212710915
  0  |  1  |  4  |  Failed
  0  |  2  |  0  |  2.8259285951711193
  0  |  2  |  1  |  0.564643185691827
  0  |  2  |  2  |  0.4940296682422924
  0  |  2  |  3  |  0.4745626784706283
  0  |  2  |  4  |  Failed
  0  |  3  |  0  |  30.265485741901756
  0  |  3  |  1  |  Failed
  0  |  3  |  2  |  Failed
  0  |  3  |  3  |  Failed
  0  |  3  |  4  |  Failed
  0  |  4  |  0  |  327.0448355174094
  0  |  4  |  1  |  Failed
  0  |  4  |  2  |  Failed
  0  |  4  |  3  |  Failed
  0  |  4  |  4  |  Failed
  1  |  0  |  0  |  Failed
  1  |  0  |  1  |  Failed
  1  |  0  |  2  |  Failed
  1  |  0  |  3  |  Failed
  1  |  0  |  4  |  Failed
  1  |  1  |  