- Base Code: https://dacon.io/competitions/official/236176/codeshare/9170?page=1&dtype=recent

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random

from prophet import Prophet
import holidays

In [5]:
seed_num = 42
random.seed(seed_num)
np.random.seed(seed_num)

In [2]:
submission_df = pd.read_csv('dataset/sample_submission.csv')
train = pd.read_csv('dataset/train.csv')
test = pd.read_csv('dataset/test.csv')

In [3]:
prophet_data = train.rename(columns={'timestamp': 'ds', 'price(원/kg)': 'y'})
prophet_data = prophet_data[['ID', 'ds', 'y']]
prophet_data['ID'] = prophet_data['ID'].str.replace(r'_\d{8}$', '', regex=True)

In [12]:
prophet_data['ds']

0        2019-01-01
1        2019-01-02
2        2019-01-03
3        2019-01-04
4        2019-01-05
            ...    
59392    2023-02-27
59393    2023-02-28
59394    2023-03-01
59395    2023-03-02
59396    2023-03-03
Name: ds, Length: 59397, dtype: object

In [None]:
holiday = pd.DataFrame([])
for date, name in sorted(holidays.UnitedStates(years=[2018,2019,2020]).items()):
    holiday = holiday.append(pd.DataFrame({'ds': date, 'holiday': "US-Holidays"}, index=[0]), ignore_index=True)
holiday['ds'] = pd.to_datetime(holiday['ds'], format='%Y-%m-%d', errors='ignore')

In [14]:
holiday = pd.DataFrame()
for year in range(2019, 2024):
    korean_holidays = holidays.Korea(years=year)

    for date, name in sorted(korean_holidays.items()):
        holiday = pd.concat([holiday, pd.DataFrame({'ds': pd.to_datetime(date), 'holiday': "Korean-Holidays"}, index=[0])], ignore_index=True)

holiday['ds'] = pd.to_datetime(holiday['ds'], format='%Y-%m-%d', errors='ignore')
print(holiday)

           ds          holiday
0  2019-01-01  Korean-Holidays
1  2019-02-04  Korean-Holidays
2  2019-02-05  Korean-Holidays
3  2019-02-06  Korean-Holidays
4  2019-03-01  Korean-Holidays
..        ...              ...
84 2023-09-29  Korean-Holidays
85 2023-09-30  Korean-Holidays
86 2023-10-03  Korean-Holidays
87 2023-10-09  Korean-Holidays
88 2023-12-25  Korean-Holidays

[89 rows x 2 columns]


In [15]:
from sklearn.model_selection import ParameterGrid
params_grid = {'seasonality_mode':('multiplicative','additive'),
               'changepoint_prior_scale':[0.1,0.2,0.3,0.4,0.5],
              'holidays_prior_scale':[0.1,0.2,0.3,0.4,0.5],
              'n_changepoints' : [100,150,200]}
grid = ParameterGrid(params_grid)
cnt = 0
for p in grid:
    cnt = cnt+1

print('Total Possible Models',cnt)

Total Possible Models 150


In [20]:
pred_list = []  
for code in prophet_data['ID'].unique():
    print(code)
    d = prophet_data[prophet_data['ID'] == code].reset_index().drop(['ID'], axis=1).sort_values('ds')
    model = Prophet(changepoint_prior_scale = 0.3,
                    holidays_prior_scale = 0.3,
                    n_changepoints = 150,
                    seasonality_mode = 'multiplicative',
                    yearly_seasonality = True,
                    weekly_seasonality = True,
                    daily_seasonality = True,
                    holidays = holiday,
                    interval_width=0.95)
    model.add_country_holidays(country_name='Korea')
    
    model.fit(d)
    
    future = pd.DataFrame()
    future['ds'] = pd.date_range(start='2023-03-04', periods=28, freq='D') 
    forecast = model.predict(future)
    
    pred_y = forecast['yhat'].values
    pred_code = [str(code)] * len(pred_y)
    
    for y_val, id_val in zip(pred_y, pred_code):
        pred_list.append({'ID': id_val, 'y': y_val})
        
pred = pd.DataFrame(pred_list) 

TG_A_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


TG_A_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


TG_B_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


TG_B_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


TG_C_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


TG_C_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


TG_D_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


TG_D_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


TG_E_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


TG_E_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


CR_A_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


CR_B_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


CR_C_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


CR_D_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


CR_D_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


CR_E_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


CR_E_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


CB_A_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


CB_A_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


CB_D_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


CB_E_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


RD_A_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


RD_A_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


RD_C_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


RD_D_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


RD_D_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


RD_E_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


RD_E_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


BC_A_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


BC_A_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


BC_B_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


BC_B_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


BC_C_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


BC_C_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


BC_D_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


BC_E_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


BC_E_S


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


CB_F_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


RD_F_J


INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


In [21]:
submission_df['answer'] = pred['y']
submission_df.to_csv('prophet_with_holiday.csv',index=False)