In [1]:
import numpy as np
import pandas as pd
import math
import os
import sys


nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

In [2]:
from time_series.data_manager import DataManager
from time_series.models.AutoArima import AutoARIMA


In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

# Get data from transation fact

In [4]:
data_manager = DataManager()

In [5]:
warehouses = ['Hà Nội', 'Bình Dương', 'Đà Nẵng']
dfbyday, _, _, _ = data_manager.aggregate_data()
for warehouse in warehouses:
    dfbyday[warehouse] = data_manager.filter_outlier(warehouse)

In [6]:
dfbyday

Unnamed: 0_level_0,Hà Nội,Bình Dương,Đà Nẵng
created_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-11-02,5.000000,7.0,1.0
2018-11-03,2.000000,3.0,0.0
2018-11-04,6.000000,2.0,1.0
2018-11-05,4.333333,0.0,2.0
2018-11-06,27.000000,9.0,1.0
...,...,...,...
2019-11-01,32.000000,4.0,22.0
2019-11-02,26.000000,8.0,6.0
2019-11-03,9.000000,13.0,5.0
2019-11-04,73.000000,16.0,12.0


In [7]:
models = {}
for warehouse in dfbyday.columns:
    print(f'Fit ARIMA for {warehouse}')
    model = AutoARIMA(dfbyday, warehouse, None)
    models[warehouse] = model

Fit ARIMA for Hà Nội
Fit ARIMA: order=(1, 0, 1) seasonal_order=(0, 0, 1, 7); AIC=3270.352, BIC=3289.906, Fit time=2.132 seconds
Fit ARIMA: order=(0, 0, 0) seasonal_order=(0, 0, 0, 7); AIC=3383.322, BIC=3391.144, Fit time=0.020 seconds
Fit ARIMA: order=(1, 0, 0) seasonal_order=(1, 0, 0, 7); AIC=3279.183, BIC=3294.826, Fit time=1.626 seconds
Fit ARIMA: order=(0, 0, 1) seasonal_order=(0, 0, 1, 7); AIC=3299.585, BIC=3315.228, Fit time=1.109 seconds
Fit ARIMA: order=(1, 0, 1) seasonal_order=(1, 0, 1, 7); AIC=3224.238, BIC=3247.702, Fit time=4.465 seconds
Fit ARIMA: order=(1, 0, 1) seasonal_order=(1, 0, 0, 7); AIC=3269.641, BIC=3289.195, Fit time=2.811 seconds
Fit ARIMA: order=(1, 0, 1) seasonal_order=(1, 0, 2, 7); AIC=3217.518, BIC=3244.894, Fit time=7.551 seconds
Fit ARIMA: order=(0, 0, 1) seasonal_order=(1, 0, 2, 7); AIC=3234.547, BIC=3258.012, Fit time=5.407 seconds
Fit ARIMA: order=(2, 0, 1) seasonal_order=(1, 0, 2, 7); AIC=3228.481, BIC=3259.767, Fit time=7.258 seconds
Fit ARIMA: order

In [8]:
def get_index_of_forecast(df, period):
    index_of_fc = []
    for day in range(period):
        time_shift = pd.to_timedelta(day + 1, unit='D')
        index_of_fc.append(df.index[-1] + time_shift)
    return index_of_fc
    

In [9]:
forecast_dfs = {}

In [10]:
period = 1

forecast = {}
conf_int = {}


for warehouse in warehouses:
    forecast[warehouse], conf_int[warehouse] = models[warehouse].forecast(period)
    
forecast_dfs['1_day'] = pd.DataFrame(forecast, index=get_index_of_forecast(dfbyday, period))
forecast_dfs['1_day']

Unnamed: 0,Hà Nội,Bình Dương,Đà Nẵng
2019-11-06,43.0,18.0,13.0


In [11]:
period = 7

forecast = {}
conf_int = {}

warehouses = list(models.keys())
for warehouse in warehouses:
    forecast[warehouse], conf_int[warehouse] = models[warehouse].forecast(period)
    
forecast_dfs['7_day'] = pd.DataFrame(forecast, index=get_index_of_forecast(dfbyday, period))
forecast_dfs['7_day']

Unnamed: 0,Hà Nội,Bình Dương,Đà Nẵng
2019-11-06,43.0,18.0,13.0
2019-11-07,56.0,20.0,14.0
2019-11-08,47.0,22.0,14.0
2019-11-09,30.0,11.0,14.0
2019-11-10,29.0,9.0,14.0
2019-11-11,50.0,23.0,14.0
2019-11-12,49.0,20.0,14.0


In [12]:
period = 14

forecast = {}
conf_int = {}

warehouses = list(models.keys())
for warehouse in warehouses:
    forecast[warehouse], conf_int[warehouse] = models[warehouse].forecast(period)
    
forecast_dfs['14_day'] = pd.DataFrame(forecast, index=get_index_of_forecast(dfbyday, period))
forecast_dfs['14_day']

Unnamed: 0,Hà Nội,Bình Dương,Đà Nẵng
2019-11-06,43.0,18.0,13.0
2019-11-07,56.0,20.0,14.0
2019-11-08,47.0,22.0,14.0
2019-11-09,30.0,11.0,14.0
2019-11-10,29.0,9.0,14.0
2019-11-11,50.0,23.0,14.0
2019-11-12,49.0,20.0,14.0
2019-11-13,43.0,17.0,14.0
2019-11-14,50.0,25.0,14.0
2019-11-15,45.0,13.0,14.0


In [13]:
period = 30

forecast = {}
conf_int = {}

warehouses = list(models.keys())
for warehouse in warehouses:
    forecast[warehouse], conf_int[warehouse] = models[warehouse].forecast(period)
    
forecast_dfs['30_day'] = pd.DataFrame(forecast, index=get_index_of_forecast(dfbyday, period))
forecast_dfs['30_day']

Unnamed: 0,Hà Nội,Bình Dương,Đà Nẵng
2019-11-06,43.0,18.0,13.0
2019-11-07,56.0,20.0,14.0
2019-11-08,47.0,22.0,14.0
2019-11-09,30.0,11.0,14.0
2019-11-10,29.0,9.0,14.0
2019-11-11,50.0,23.0,14.0
2019-11-12,49.0,20.0,14.0
2019-11-13,43.0,17.0,14.0
2019-11-14,50.0,25.0,14.0
2019-11-15,45.0,13.0,14.0


In [14]:
keys = list(forecast_dfs.keys())

for key in keys:
    df = forecast_dfs[key]
    start_date = df.index.min().strftime('%Y%m%d')
    end_date = df.index.max().strftime('%Y%m%d')

    file_name = f'forecast_{key}.csv'
    df.to_csv(f'../data/{file_name}')