# Set up drive and load dataframe 

In [None]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from google.colab import drive
drive.mount('/content/gdrive')

In [3]:
df = pd.read_csv('gdrive/MyDrive/SumoSimulationWeeklyDataset/cpmob.csv' )
df.columns =['timestamp', 'region_1', 'region_2', 'region_3','region_4','region_5','region_6']
df.head(5)

Unnamed: 0,timestamp,region_1,region_2,region_3,region_4,region_5,region_6
0,1.0,154.0,184.0,107.0,119.0,94.0,89.0
1,2.0,322.0,358.0,196.0,236.0,147.0,176.0
2,3.0,469.0,451.0,334.0,375.0,165.0,246.0
3,4.0,574.0,567.0,445.0,460.0,190.0,307.0
4,5.0,608.0,628.0,490.0,533.0,240.0,348.0


# Install arima package (if not already installed) 

In [None]:
! pip install pmdarima

# Set up lead times 

In [5]:
# return a list of relative forecast lead times
def get_lead_times():
	return [1, 2, 3, 4, 5, 6]

# Set up data structures to maintain the values of the predictions for each step


In [None]:
# Run in case you want to rerun in the same session -> Reset list 
predictionSteps.clear()

In [8]:
predictionSteps = []
for i in range(6):
  predictionSteps.append([])

In [None]:
# Run in case you want to rerun in the same session -> Reset list 
testSteps.clear()

In [10]:
offset = 6
testSteps = []
for i in range(6):
  testSteps.append([])

X = df.region_6.values
size = 1_152
for i in range(6):
  testSteps[i] = X[size+i:int(len(X)-offset+i)]

# Univariate Multistep forecast implementation (for 6 steps ahead) 

In [None]:
from numpy import array
from numpy import nanmedian
from statsmodels.tsa.arima.model import ARIMA
import time
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt
from matplotlib import pyplot
from warnings import catch_warnings
from warnings import filterwarnings

X = df.region_6.values
size = 1_152
train, test = X[0:size], X[size:int(len(X)-offset)]
history = [x for x in train]
predictions = list()
total_size = len(test)
count = 1
for t in range(int(len(test))):
  print('current=%d, total=%d' % (count, total_size))
  count = count + 1
  # define the model
  model = ARIMA(history, order=(0,1,0)) # Define Parameters for ARMA-ARIMA model 
  # return a nan forecast in case of exception
  try:
    # ignore statsmodels warnings
    with catch_warnings():
      filterwarnings("ignore")
      # fit the model
      model_fit = model.fit()
      # forecast half an hour
      yhat = model_fit.predict(len(history), len(history)+6)
      # extract lead times
      lead_times = array(get_lead_times())
      indices = lead_times - 1
      #print(yhat[indices])
      for i in range(6):
        predictionSteps[i].append(yhat[i])
        #predictions.append(yhat[i])
      obs = test[t]
      #for i in range(6):
        #history.append(obs[i])
      history.append(obs)
      #print('predicted=%f, expected=%f' % (yhat, obs))
  except:
    print("exception") 

# Metrics Calculation and Representation

In [None]:
scores = list()
scores2 = list()
# evaluate forecasts
for i in range(6):
  mae = mean_absolute_error(testSteps[i], predictionSteps[i])
  print('MAE: %.3f' % mae)
  scores.append(mae)
  rmse = sqrt(mean_squared_error(testSteps[i], predictionSteps[i]))
  print('RMSE: %.3f' % rmse)
  scores2.append(rmse)


In [None]:
s_scores = ', '.join(['%.4f' % s for s in scores])
s_scores2 = ', '.join(['%.4f' % s for s in scores2])
print('%s:  %s' % ("MAE",  s_scores))
print('%s:  %s' % ("RMSE", s_scores2))