In [1]:
from _element import feature_control as ft_c

In [2]:
from _usecase import algorithm_prophet as prpt
from fbprophet.diagnostics import cross_validation as CV

In [3]:
from _element import calculations as calc

In [4]:
import pandas as pd
from pandas.plotting import autocorrelation_plot
import matplotlib.pyplot as plt
import numpy as np

import copy

In [5]:
from datetime import datetime

In [6]:
from _element import varr

In [7]:
INPUT_FILENAME= 'KPP일별_반납회수(13_17)_raw.xlsx'

In [8]:
df_raw= ft_c.xlsx_opener(varr.PJ_DIR+'./_element/data/private_return/', INPUT_FILENAME)

In [9]:
df_temp= ft_c.xlsx_opener(varr.PJ_DIR+varr.TEMP_DATA_DIR, 'temp_data_merged.xlsx')

In [10]:
y_col= df_raw.columns.values.tolist()

In [11]:
try:
    y_col.remove('ds')
except ValueError:
    pass

In [12]:
y_col

['y_sum']

In [13]:
x_col= df_temp.columns.values.tolist()

In [14]:
x_col

['ds', 'temp_max', 'temp_min', 'rain_amount']

In [15]:
df_raw['ds']= pd.to_datetime(df_raw['ds'], box=True, format= '%Y/%m/%d', exact=True)

In [16]:
df_temp['ds']= pd.to_datetime(df_temp['ds'], box=True, format= '%Y/%m/%d', exact=True)

In [17]:
df_raw= pd.merge(df_raw, df_temp, how='inner', on='ds')

In [18]:
df_raw.head()

Unnamed: 0,ds,y_sum,temp_max,temp_min,rain_amount
0,2013-01-01,17399,-0.4,-12.9,3.0
1,2013-01-02,65748,-9.4,-16.200001,0.0
2,2013-01-03,73139,-9.6,-15.3,0.0
3,2013-01-04,78096,-6.2,-13.7,0.0
4,2013-01-05,64780,-2.2,-10.9,0.0


In [19]:
df_weekday= df_raw.ds.apply(lambda x: x.weekday())
df_weekday= df_raw.loc[(df_weekday>=0)&(df_weekday<=4)]
df_sat= df_raw.loc[df_raw.ds.apply(lambda x: x.weekday())==5]
df_sun= df_raw.loc[df_raw.ds.apply(lambda x: x.weekday())==6]

In [20]:
traintest_dict= {}
for y in y_col:
    traintest_dict[y]= {}
    traintest_dict[y]['weekday']= ft_c.train_test_sample(df_weekday, y, x_col)
    traintest_dict[y]['sat']= ft_c.train_test_sample(df_sat, y, x_col)
    traintest_dict[y]['sun']= ft_c.train_test_sample(df_sun, y, x_col)

In [23]:
result_dict= {}
for y in y_col:
    result_dict[y]= {
                    'weekday': prpt.Bayseian2(traintest_dict[y]['weekday'], 'week'),
                    'sat': prpt.Bayseian2(traintest_dict[y]['sat'], 'week'),
                    'sun': prpt.Bayseian2(traintest_dict[y]['sun'], 'week'),
                    }

In [24]:
merged_dict={}
for y in y_col:
    merged_dict[y]={'future': 
                    pd.concat([result_dict[y]['weekday']['future'],
                              result_dict[y]['sat']['future'],
                              result_dict[y]['sun']['future']],
                              axis= 0).sort_values(by='ds', axis=0),
                   'forecastProphetTable':
                    pd.concat([result_dict[y]['weekday']['forecastProphetTable'],
                              result_dict[y]['sat']['forecastProphetTable'],\
                              result_dict[y]['sun']['forecastProphetTable']],\
                              axis= 0).sort_values(by='ds', axis=0)
                   }

In [25]:
forecast_dict={}
print(INPUT_FILENAME)
for y in y_col:
    merged_dict[y]['future']['ds']= pd.to_datetime(merged_dict[y]['future']['ds'], box=True, format= '%Y/%m/%d', exact=True)
    merged_dict[y]['forecastProphetTable']['ds']= pd.to_datetime(merged_dict[y]['forecastProphetTable']['ds'], box=True, format= '%Y/%m/%d', exact=True)
    forecast_dict[y]= prpt.extract_info_from(merged_dict[y]['future'], merged_dict[y]['forecastProphetTable'], varr.FORECASTDAY)
    forecast_dict[y]['result_df']= pd.merge(forecast_dict[y]['result_df'], df_raw[['ds', y]], how='inner', on='ds')
    print('RMSE with segment '+str(y)+': '+str(calc.rms_error(forecast_dict[y]['result_df'][y], forecast_dict[y]['result_df']['yhat'])))
    print('MAPE with segment '+str(y)+': '+str(calc.map_error(forecast_dict[y]['result_df'][y], forecast_dict[y]['result_df']['yhat'])))
    print('MAPE_with_std with segment '+str(y)+': '+str(calc.map_error_with_std(forecast_dict[y]['result_df'][y], forecast_dict[y]['result_df']['yhat'])))
    print('MAPE_div_std with segment '+str(y)+': '+str(calc.map_error_div_std(forecast_dict[y]['result_df'][y], forecast_dict[y]['result_df']['yhat'])))
    print('sMAPE of company code '+str(y)+': '+str(calc.smap_error(forecast_dict[y]['result_df'][y], forecast_dict[y]['result_df']['yhat'])))
    print(forecast_dict[y]['result_df'].head())
#     ft_c.save_as_xlsx(varr.PJ_DIR+varr.DF_DIR, forecast_dict[y]['result_df'], INPUT_FILENAME, '_seg'+str(y)+'withweekday')
    print('\n')

KPP일별_반납회수(13_17)_raw.xlsx
RMSE with segment y_sum: 23173.3535189
MAPE with segment y_sum: 9.91780854541
MAPE_with_std with segment y_sum: 31.0692472818
MAPE_div_std with segment y_sum: 49.1091697147
sMAPE of company code y_sum: 11.0108679219
          ds  temp_max  temp_min  rain_amount           yhat   y_sum
0 2017-11-24       2.9      -2.5          0.1  112054.016914  106479
1 2017-11-25       6.4       0.8         15.7   71960.254605   68246
2 2017-11-26       6.4      -2.4          0.0    1415.725708    1165
3 2017-11-27       8.1       1.8          0.0  111624.406190  112203
4 2017-11-28       9.8       4.2          1.2  114950.595302  115725




In [39]:
forecast_dict['y_sum']['result_df']

Unnamed: 0,ds,temp_max,temp_min,rain_amount,yhat,y_sum
0,2017-11-24,2.9,-2.5,0.1,112054.016914,106479
1,2017-11-25,6.4,0.8,15.7,71960.254605,68246
2,2017-11-26,6.4,-2.4,0.0,1415.725708,1165
3,2017-11-27,8.1,1.8,0.0,111624.40619,112203
4,2017-11-28,9.8,4.2,1.2,114950.595302,115725
5,2017-11-29,5.7,-3.2,0.0,118929.428045,121990
6,2017-11-30,1.8,-7.1,0.0,120640.235185,181499


In [None]:
result_dict[y]['weekday']['model'].plot(result_dict[y]['weekday']['forecastProphetTable'])

In [None]:
result_dict[y]['weekday']['model'].plot_components(result_dict[y]['weekday']['forecastProphetTable'])

In [None]:
merged_dict['y']['forecastProphetTable'].head(20)

In [None]:
# weekly_seasonality= merged_dict['y_sum']['forecastProphetTable']['yhat']
# weekly_seasonality= weekly_seasonality.reset_index(drop= True)
# df_season= df_raw
# df_season['y_sum']= df_raw['y_sum']- weekly_seasonality

In [None]:
# autocorrelation_plot(df_season['y_sum'])
# plt.show()

In [None]:
# df_season[-150:].y_sum.autocorr(lag=7)

In [None]:
y_hat_series= copy.deepcopy(merged_dict['y']['forecastProphetTable']['yhat'].reset_index(drop= True))

In [None]:
y_series= df_raw.y

In [None]:
np.std(y_hat_series- y_series)

In [None]:
err_series= y_hat_series- y_series

In [None]:
yhat_upper_series= copy.deepcopy(merged_dict['y']['forecastProphetTable']['yhat_upper'].reset_index(drop= True))
yhat_lower_series= copy.deepcopy(merged_dict['y']['forecastProphetTable']['yhat_lower'].reset_index(drop= True))

In [None]:
df_err= df_raw.loc[(
    y_series-yhat_upper_series>=0
    )|(
    y_series-yhat_lower_series<=0
    ), :]

In [None]:
df_raw['err']= err_series
df_raw['yhat']= y_hat_series

In [None]:
df_raw.iloc[-31:, :].sort_values('err')

In [None]:
# df_cv= CV(result_dict['y']['weekday']['model'], horizon= '7 days')

In [None]:
# ft_c.train_test

In [None]:
df_raw.shape

In [None]:
# print('RMSE with segment '+str(y)+': '+str(calc.rms_error(df_cv['y'], df_cv['yhat'])))
# print('MAPE with segment '+str(y)+': '+str(calc.map_error(df_cv['y'], df_cv['yhat'])))
# print('MAPE_with_std with segment '+str(y)+': '+str(calc.map_error_with_std(df_cv['y'], df_cv['yhat'])))
# print('MAPE_div_std with segment '+str(y)+': '+str(calc.map_error_div_std(df_cv['y'], df_cv['yhat'])))
# print('sMAPE of company code '+str(y)+': '+str(calc.smap_error(df_cv['y'], df_cv['yhat'])))