In [47]:
import pandas as pd
import numpy as np
from collections import OrderedDict

In [2]:
import os, sys
sys.path.append(os.getcwd())

In [3]:
from _element import feature_control as ft_c
from _element import varr
from _element import calculations as calc
from _element.result_control import Result

from _alg.prophet import Prophet_timeseries

from _evaluation.cross_validation import Cross_Validation
from _evaluation import compare

In [4]:
INPUT_FILENAME= 'KPP일별투입(10_17)_raw.xlsx'

In [5]:
df_raw= ft_c.xlsx_opener(varr.PJ_DIR+varr.DF_DIR, INPUT_FILENAME)
df_temp= ft_c.xlsx_opener(varr.PJ_DIR+varr.TEMP_DATA_DIR, 'temp_data_merged.xlsx')

In [6]:
y_col= df_raw['raw'].columns.values.tolist()

In [7]:
try:
    y_col.remove('ds')
except ValueError:
    pass

In [8]:
x_col= df_temp['raw'].columns.values.tolist()

In [9]:
df_raw['raw']['ds']= pd.to_datetime(df_raw['raw']['ds'], box=True, format= '%Y/%m/%d', exact=True)
df_temp['raw']['ds']= pd.to_datetime(df_temp['raw']['ds'], box=True, format= '%Y/%m/%d', exact=True)
df_raw['raw']= pd.merge(df_raw['raw'], df_temp['raw'], how='inner', on='ds')

In [11]:
ft_c.add_division(df_raw, 'raw', 7, start_num= 3)

In [12]:
ft_c.divide_by(df_raw, 'raw', 'seasonality_7,3')

In [13]:
df_raw

{(0,):              ds       y   temp_max   temp_min  rain_amount  seasonality_7,3
 4    2010-07-05   65049  30.600000  21.799999          0.0                0
 11   2010-07-12   67680  28.299999  22.200001          0.0                0
 18   2010-07-19   73816  29.299999  24.200001          0.0                0
 25   2010-07-26   75731  27.600000  23.400000          6.0                0
 32   2010-08-02   67290  31.600000  24.000000          1.0                0
 39   2010-08-09   67691  32.799999  26.400000          0.0                0
 46   2010-08-16   70947  26.500000  21.000000          0.0                0
 53   2010-08-23   71422  28.900000  23.500000         13.0                0
 60   2010-08-30   68075  29.799999  22.299999          0.0                0
 67   2010-09-06   74425  29.700001  23.000000          0.0                0
 74   2010-09-13   75663  27.400000  21.100000          0.0                0
 81   2010-09-20   56183  25.400000  19.400000          4.0           

In [14]:
r= Result()

In [18]:
cv= Cross_Validation(df_raw)

In [19]:
cv.slice('y', x_col, forecastday= varr.FORECASTDAY//7)

In [20]:
cv._data

{'(0,)_0': {'test':              ds       y  temp_max  temp_min  rain_amount  seasonality_7,3
  2706 2017-11-27  117480       8.1       1.8          0.0                0,
  'testX':              ds  temp_max  temp_min  rain_amount
  2706 2017-11-27       8.1       1.8          0.0,
  'testY': 2706    117480
  Name: y, dtype: int64,
  'train':              ds       y   temp_max   temp_min  rain_amount  seasonality_7,3
  4    2010-07-05   65049  30.600000  21.799999          0.0                0
  11   2010-07-12   67680  28.299999  22.200001          0.0                0
  18   2010-07-19   73816  29.299999  24.200001          0.0                0
  25   2010-07-26   75731  27.600000  23.400000          6.0                0
  32   2010-08-02   67290  31.600000  24.000000          1.0                0
  39   2010-08-09   67691  32.799999  26.400000          0.0                0
  46   2010-08-16   70947  26.500000  21.000000          0.0                0
  53   2010-08-23   71422  28.900

In [21]:
prpt_event_revised= Prophet_timeseries(cv= True)

In [22]:
prpt_event_revised.add_model('event_revise', 'week', holidaybeta= varr.HOLYDAYBETA_revised, regressor= None)

In [23]:
prpt_event_revised.fit('event_revise', txs_traintest= cv._data)

In [24]:
prpt_event_revised.forecast('event_revise', forecast_dict= r.forecast_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  result_forecast['ds'], box=True, format= '%Y/%m/%d', exact=True


In [29]:
r.forecast_dict

OrderedDict([('weekday__0_merged',
              {'MAPE': 45.713163163531796,
               'MAPE_div_std': 73.606459015732284,
               'MAPE_with_std': 20.22663478899732,
               'RMSE': 5794.6743253523309,
               'forecast':           ds       y  temp_max  temp_min  rain_amount  seasonality_7,3  \
               0 2017-11-24  120647       2.9      -2.5          0.1                4   
               0 2017-11-25   67756       6.4       0.8         15.7                5   
               0 2017-11-26     100       6.4      -2.4          0.0                6   
               0 2017-11-27  117480       8.1       1.8          0.0                0   
               0 2017-11-28  122192       9.8       4.2          1.2                1   
               0 2017-11-29  120685       5.7      -3.2          0.0                2   
               0 2017-11-30  108386       1.8      -7.1          0.0                3   
               
                           yhat  
   

In [26]:
r.merge_result(m_name= ['_0', '_1', '_2', '_3', '_4'], m_title= 'event_revise')

{'forecast':           ds       y  temp_max  temp_min  rain_amount  seasonality_7,3  \
0 2017-11-24  120647       2.9      -2.5          0.1                4   
0 2017-11-25   67756       6.4       0.8         15.7                5   
0 2017-11-26     100       6.4      -2.4          0.0                6   
0 2017-11-27  117480       8.1       1.8          0.0                0   
0 2017-11-28  122192       9.8       4.2          1.2                1   
0 2017-11-29  120685       5.7      -3.2          0.0                2   
0 2017-11-30  108386       1.8      -7.1          0.0                3   

            yhat  
0  124592.326772  
0   62672.072598  
0    -191.516617  
0  113686.716103  
0  121378.489147  
0  122630.675957  
0  121603.957221  }
{'forecast':           ds       y   temp_max   temp_min  rain_amount  seasonality_7,3  \
0 2017-09-15  146743  27.299999  17.200001          0.0                4   
0 2017-09-16   78827  26.100000  19.200001          0.0                5   


In [27]:
r.err_rate(m_name= 'event_revise')
r.print_err_rate()

RMSE with segment weekday__0_merged: 5794.6743
MAPE with segment weekday__0_merged: 45.7132
MAPE_with_std with segment weekday__0_merged: 20.2266
MAPE_div_std with segment weekday__0_merged: 73.6065
sMAPE with segment weekday__0_merged: 4.3957


RMSE with segment weekday__1_merged: 10462.4891
MAPE with segment weekday__1_merged: 21.5721
MAPE_with_std with segment weekday__1_merged: 24.4708
MAPE_div_std with segment weekday__1_merged: 235.6025
sMAPE with segment weekday__1_merged: 9.2321


RMSE with segment weekday__2_merged: 6768.3937
MAPE with segment weekday__2_merged: 43.0813
MAPE_with_std with segment weekday__2_merged: 20.8105
MAPE_div_std with segment weekday__2_merged: 82.9035
sMAPE with segment weekday__2_merged: 4.8596


RMSE with segment weekday__3_merged: 25615.0623
MAPE with segment weekday__3_merged: 37.9641
MAPE_with_std with segment weekday__3_merged: 37.1187
MAPE_div_std with segment weekday__3_merged: 82.7611
sMAPE with segment weekday__3_merged: 22.9499


RMSE with se

In [30]:
r.avg_result(m_name= 'event_revise')

{'forecast': [          ds       y  temp_max  temp_min  rain_amount  seasonality_7,3  \
0 2017-11-24  120647       2.9      -2.5          0.1                4   
0 2017-11-25   67756       6.4       0.8         15.7                5   
0 2017-11-26     100       6.4      -2.4          0.0                6   
0 2017-11-27  117480       8.1       1.8          0.0                0   
0 2017-11-28  122192       9.8       4.2          1.2                1   
0 2017-11-29  120685       5.7      -3.2          0.0                2   
0 2017-11-30  108386       1.8      -7.1          0.0                3   

            yhat  
0  124592.326772  
0   62672.072598  
0    -191.516617  
0  113686.716103  
0  121378.489147  
0  122630.675957  
0  121603.957221  ,           ds       y   temp_max   temp_min  rain_amount  seasonality_7,3  \
0 2017-09-15  146743  27.299999  17.200001          0.0                4   
0 2017-09-16   78827  26.100000  19.200001          0.0                5   
0 2017-09-17

In [31]:
r.print_err_rate()

RMSE with segment weekday_result: 13547.0090
MAPE with segment weekday_result: 36.0416
MAPE_with_std with segment weekday_result: 25.3174
MAPE_div_std with segment weekday_result: 140.7680
sMAPE with segment weekday_result: 11.8902


