In [None]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline
plt.rc('font', family='malgun gothic')
plt.rc('axes', unicode_minus=False)
import seaborn as sns
import plotly.express as px
import os
import missingno as msno
import pickle
from glob import glob
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")
import matplotlib
import random


from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import RobustScaler

from fbprophet import Prophet

In [None]:
orders = pd.read_csv('orders.csv')
deliveries = pd.read_csv('deliveries.csv')

def beg_end_month(x):
    if x<=10:
        return '월초'
    elif 10<x<=20:
        return '월중'
    elif 20<x<=31:
        return '월말'
orders.BKG_DATE = pd.to_datetime(orders.BKG_DATE, format='%Y-%m-%d')
orders.INS_DATE = pd.to_datetime(orders.INS_DATE, format='%Y-%m-%d')
orders["BKG_TIME"] = pd.to_datetime(orders["BKG_TIME"], format='%Y-%m-%d %H:%M:%S')
orders['BKG_WEEK'] = orders.BKG_DATE.dt.week
orders['BKG_MONTH2'] = orders.BKG_DATE.dt.day.map(beg_end_month)

In [None]:
data = orders[orders.BKG_TYP==7][orders.CORP_ID=='KX007'].groupby(['BKG_DATE','BKG_HOUR'])['ITEM_QTY'].sum().reset_index()
data = data.append(pd.DataFrame(dict(zip(['BKG_DATE','BKG_HOUR','ITEM_QTY'],[(pd.to_datetime('2021-06-28'),pd.to_datetime('2021-06-28')), (4,5), (0,0)]))))
data = data.sort_values(['BKG_DATE','BKG_HOUR'])

comb_date = []
for date, hour, _ in data.values:
    comb_date.append(pd.to_datetime(f'{date.year}-{date.month}-{date.day} {hour}:00:00'))
data['DATE'] = comb_date
data = data.set_index('DATE').drop(columns=['BKG_DATE', 'BKG_HOUR']).rename(columns={'ITEM_QTY':'TARGET'})

## Prophet not holiday

In [None]:
np.random.seed(1234)
random.seed(1234)


pred_list = []
for i in tqdm(range(30)):
    split_date = pd.to_datetime('2021-05-31') + pd.Timedelta(days=i)
    train_set = data.loc[data.index < split_date].copy()
    test_set = data.loc[split_date:split_date+pd.Timedelta(days=2)].copy()
    model = Prophet(yearly_seasonality=False)
    model.fit(train_set.reset_index().rename(columns={'DATE':'ds', 'TARGET':'y'}),) # 데이터프레임에 무조건 ds, y컬럼 있어야 함
    prediction = model.predict(test_set.reset_index().rename(columns={'DATE':'ds', 'TARGET':'y'}))
    pred_list.append(prediction[-25:])

In [None]:
preds = pd.concat(pred_list).drop_duplicates('ds', keep='last')

In [None]:
f, ax = plt.subplots(figsize=(15,5))
model.plot(preds, ax=ax)
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(10,4))
ax.plot(data.iloc[-720:].index, preds['yhat'], label='preds', color='k')
data.iloc[-720:].plot(ax=ax, label='real', color='pink')
plt.legend()
plt.show()

In [None]:
fig, ax = plt.subplots(2,1, figsize=(10,10))
ax[0].plot(data.iloc[-720:-360].index, preds['yhat'].iloc[:360], label='preds', color='k')
data.iloc[-720:-360].plot(ax=ax[0], label='real', color='pink')
ax[0].set_title('Validation Set')
ax[0].legend()
ax[1].plot(data.iloc[-360:].index, preds['yhat'].iloc[360:], label='preds', color='k')
data.iloc[-360:].plot(ax=ax[1], label='real', color='pink')
ax[1].set_title('Validation Set')
ax[1].legend()
plt.show()

In [None]:
mean_squared_error(preds['yhat'].values, data.iloc[-720:].values, squared=False)

In [None]:
mean_squared_error(preds['yhat'].iloc[-15*24:].values, data.iloc[-15*24:].values, squared=False)

## Prophet with holiday

In [None]:
nestle = ['2021-03-22', '2021-04-15', '2021-05-17', '2021-06-17']
nut_one = ['2021-04-25', '2021-04-30','2021-05-16','2021-05-31',
           '2021-06-13', '2021-06-27']

lg3 = [f'2021-03-{i}' for i in range(22,29)]
lg6 = [f'2021-06-{i}' for i in range(7,16)]
lg_brand = ['2021-04-20','2021-05-17']
lg = lg3 + lg_brand + lg6

events = nestle + nut_one + lg

holiday_df = data.loc[events].reset_index().rename(columns={'DATE':'ds'}).drop(columns=['TARGET'])
holiday_df['holiday'] = 'eventday'
holiday_df.head()

In [None]:
np.random.seed(1234)
random.seed(1234)

pred_list = []
for i in tqdm(range(30)):
    split_date = pd.to_datetime('2021-05-31') + pd.Timedelta(days=i)
    train_set = data.loc[data.index < split_date].copy()
    test_set = data.loc[split_date:split_date+pd.Timedelta(days=2)].copy()
    model = Prophet(yearly_seasonality=False, holidays=holiday_df)
    model.fit(train_set.reset_index().rename(columns={'DATE':'ds', 'TARGET':'y'}),) # 데이터프레임에 무조건 ds, y컬럼 있어야 함
    prediction = model.predict(test_set.reset_index().rename(columns={'DATE':'ds', 'TARGET':'y'}))
    pred_list.append(prediction[-25:])

In [None]:
preds = pd.concat(pred_list).drop_duplicates('ds', keep='last')

In [None]:
fig, ax = plt.subplots(figsize=(10,4))
ax.plot(data.iloc[-720:].index, preds['yhat'], label='preds', color='k')
data.iloc[-720:].plot(ax=ax, label='real', color='pink')
ax.set_title('Prophet with Holidays')
plt.legend()
plt.show()

In [None]:
fig, ax = plt.subplots(2,1, figsize=(10,10))
ax[0].plot(data.iloc[-720:-360].index, preds['yhat'].iloc[:360], label='preds', color='k')
data.iloc[-720:-360].plot(ax=ax[0], label='real', color='pink')
ax[0].set_title('Validation Set')
ax[0].legend()
ax[1].plot(data.iloc[-360:].index, preds['yhat'].iloc[360:], label='preds', color='k')
data.iloc[-360:].plot(ax=ax[1], label='real', color='pink')
ax[1].set_title('Validation Set')
ax[1].legend()
plt.show()

In [None]:
mean_squared_error(preds['yhat'].values, data.iloc[-720:].values, squared=False)

In [None]:
mean_squared_error(preds['yhat'].iloc[-15*24:].values, data.iloc[-15*24:].values, squared=False)

In [None]:
model.plot_components(preds)
plt.plot()

In [None]:
preds[['ds','yhat']].to_csv('restults_prophet_hol.csv', index=False)