# Prophet 시계열 예측
---

In [2]:
import pandas as pd
import prophet
import matplotlib.pyplot as plt

## [1] 데이터 로딩
---

In [3]:
feature = pd.read_csv("../../Data/use-data/data_tomato.csv", index_col=0)
target = pd.read_csv("../../Data/use-data/target.csv", index_col=0)

data = pd.concat([feature, target], axis=1)

In [5]:
data.columns

Index(['TCL_AT', 'TCL_LT', 'TCL_HT', 'TEMP_EF_CAP', 'TEMP_EF_DONGNAM',
       'TEMP_EF_DAEGYUNG', 'TEMP_EF_CHOONG', 'TEMP_EF_HONAM', 'TEMP_EF_MEAN',
       'POWER_WATER', 'POWER_FIRE', 'POWER_NUCLEAR', 'POWER_NAEYEON',
       'POWER_GROUP', 'POWER_TRANS', 'POWER_GAS', 'POWER_JUNG', 'POWER_MOO',
       'POWER_YOU', 'POWER_LNG', 'ELEC_ALL', 'ELEC_FAMILY', 'ELEC_PUBLIC',
       'ELEC_SERVICE', 'ELEC_CIVIL_SUM', 'ELEC_ARGRI_FISHING', 'ELEC_MINING',
       'ELEC_MANUFACT', 'ELEC_INDSUM', 'RV', 'GAS_PRICE_IND', 'OIL_PRICE',
       'QVA', 'CIVIL', 'IND'],
      dtype='object')

In [15]:
temp_cols = [col for col in data.columns if "TEMP" in col]
tcl_cols = [col for col in data.columns if "TCL" in col]
power_cols = [col for col in data.columns if "POWER" in col]
elec_cols = [col for col in data.columns if "ELEC" in col]

data_temp = data[temp_cols].reset_index()
data_tcl = data[tcl_cols].reset_index()
data_power = data[power_cols].reset_index()
data_elec = data[elec_cols].reset_index()

data_qva = data["QVA"].reset_index()
data_rv = data["RV"].reset_index()
data_civil = data["CIVIL"].reset_index()
data_ind = data["IND"].reset_index()

In [19]:
data_temp

Unnamed: 0,DATE,TEMP_EF_CAP,TEMP_EF_DONGNAM,TEMP_EF_DAEGYUNG,TEMP_EF_CHOONG,TEMP_EF_HONAM,TEMP_EF_MEAN
0,1996-01-01,0.84711,5.91778,6.73765,3.33479,1.88651,3.744768
1,1996-02-01,0.81430,5.90875,6.73291,3.32638,1.87883,3.732234
2,1996-03-01,0.60731,5.81529,6.67634,3.22000,1.78740,3.621268
3,1996-04-01,0.24132,5.71003,6.61264,3.07795,1.66054,3.460496
4,1996-05-01,-0.38328,5.67433,6.58956,3.02586,1.62083,3.305460
...,...,...,...,...,...,...,...
295,2020-08-01,-0.52038,5.55479,6.50341,2.91789,1.55535,3.202212
296,2020-09-01,-0.48046,5.62159,6.57738,2.98076,1.59811,3.259476
297,2020-10-01,-0.14824,5.67245,6.61732,3.06704,1.64228,3.370170
298,2020-11-01,0.23138,5.73286,6.65465,3.15240,1.70088,3.494434


## [2] prophet 모델
---

In [17]:
def prophet_model(data:pd.DataFrame, col:str, params:dict, periods:int, dtype='int'):
    """시계열 예측 결과값 반환해주는 모델
    Args:
        data: 데이터프레임
        col: 피쳐 컬럼 이름
        params: prophets 파라미터
        periods: 예측 일수
    
    Returns:
        pd.DataFrame: 예측 피쳐 데이터프레임형태 반환
    """
    data_c = data.copy()
    # rename cols
    data_c = data_c.rename(columns={col: 'y', "DATE": "ds"})
    
    # add params
    m = prophet.Prophet(**params)
    # m.add_country_holidays(country_name="KOR")
    m.add_seasonality(name="monthly", period=30.5, fourier_order=3, prior_scale=0.01, mode="multiplicative")
    
    # fit model
    m.fit(data_c)
    
    # future data
    future = m.make_future_dataframe(periods=periods, freq='MS')
    forecast = m.predict(future)
    
    # plot forecast
    fig = m.plot(forecast)
    a = prophet.plot.add_changepoints_to_plot(fig.gca(), m, forecast)
    
    # plot components
    m.plot_components(forecast)
    
    # return forecast feature
    pred = forecast["yhat"].astype(dtype)[-periods:]
    date = future.iloc[:,0][-periods:]
    result = pd.concat([date, pred], axis=1)
    
    return result


In [18]:
# default scale = 0.05
# default seasonality = 10

params = {
    "changepoint_prior_scale": 0.035,
    "changepoint_range": 0.8,
    "seasonality_prior_scale": 5,
    "weekly_seasonality": True,
    "yearly_seasonality": True,
    "daily_seasonality": False,
    "seasonality_mode": "multiplicative",
    # "holidays_prior_scale": 1,
    "interval_width": 0.8
}

pred = prophet_model(data, 'ELEC_INDSUM', params=params, periods=12*15)

for col in data_power.colums:
    prophet_model(data_power, col, params, 12*15)

ValueError: Dataframe must have columns "ds" and "y" with the dates and values respectively.