In [14]:
import os
import numpy
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import matplotlib.image as mpimg
import optuna
from itertools import cycle
import seaborn as sns
import statsmodels.api as sm 
from scipy.interpolate import interp1d
from prophet import Prophet

pd.set_option('display.max_columns', 50)
plt.style.use('bmh')
color_cycle = cycle(plt.rcParams['axes.prop_cycle'].by_key()['color'])

In [15]:
%%time
oj = os.path.join
INPUT_DIR = '../input/m5-forecasting-accuracy/'
train = pd.read_csv(oj(INPUT_DIR, 'trainset.csv'))
price = pd.read_csv(oj(INPUT_DIR, 'sell_prices.csv'))
calendar = pd.read_csv(oj(INPUT_DIR, 'calendar.csv'))

sample_submit = pd.read_csv(oj(INPUT_DIR, 'submit.csv'))

CPU times: total: 3.14 s
Wall time: 8.02 s


In [36]:
print(calendar['event_type_1'].unique())

[nan 'Sporting' 'Cultural' 'National' 'Religious']


In [40]:
print(calendar['event_name_2'].unique())

[nan 'Easter' 'Cinco De Mayo' 'OrthodoxEaster' "Father's day"]


In [39]:
print(calendar['event_type_2'].unique())

[nan 'Cultural' 'Religious']


In [38]:
print(price.head())
print(price.shape)

  store_id        item_id  wm_yr_wk  sell_price
0     CA_1  HOBBIES_1_001     11325        9.58
1     CA_1  HOBBIES_1_001     11326        9.58
2     CA_1  HOBBIES_1_001     11327        8.26
3     CA_1  HOBBIES_1_001     11328        8.26
4     CA_1  HOBBIES_1_001     11329        8.26
(6841121, 4)


In [37]:
print(train.shape)

(30490, 1933)


In [72]:
holidays = calendar[['date', 'event_name_1', 'event_type_1', 'event_name_2', 'event_type_2']]
holidays = holidays.rename(columns={'date': 'ds'})
holidays['lower_window'] = 0
holidays['upper_window'] = 1
holidays1 = holidays.dropna(subset = ['event_name_1'])
holidays2 = holidays.dropna(subset = ['event_name_2'])
holidays1 = holidays1[['ds', 'lower_window', 'upper_window', 'event_name_1']]
holidays1 = holidays1.rename(columns={'event_name_1': 'holiday'})
holidays2 = holidays2[['ds', 'lower_window', 'upper_window', 'event_name_2']]
holidays2 = holidays2.rename(columns={'event_name_2': 'holiday'})
holidays = pd.concat((holidays1, holidays2))
holidays = holidays.reset_index(drop=True)
print(holidays.head())


           ds  lower_window  upper_window        holiday
0  2011-02-06             0             1      SuperBowl
1  2011-02-14             0             1  ValentinesDay
2  2011-02-21             0             1  PresidentsDay
3  2011-03-09             0             1      LentStart
4  2011-03-16             0             1      LentWeek2


In [73]:
submit_df = pd.DataFrame()
for i in range(10):
    df = pd.DataFrame(calendar[['date', 'd', 'wm_yr_wk', 'event_name_1', 'event_type_1', 'event_name_2', 'event_type_2']])
    df_train =  df.loc[0:1926]
    df_test = df.loc[1927:]

    #学習用df編集，sell_price追加
    df_train['y'] = train.loc[i,'d_1':].values
    df_train = pd.merge(df_train, price[(price['store_id'] == train.loc[i,'store_id']) \
                & (price['item_id'] == train.loc[0,'item_id'])]\
                [['wm_yr_wk', 'sell_price']], how='left', on=['wm_yr_wk'])
    
    df_train = df_train.rename(columns={'date': 'ds'})
    df_train = df_train.fillna({"sell_price":9999999999})
  
    #model定義
    model = Prophet(weekly_seasonality=True, yearly_seasonality=True, holidays=holidays)
    # model = Prophet(weekly_seasonality=True, yearly_seasonality=True)
    
    #周期生追加
    model.add_seasonality(name="monthly", period=30.5, fourier_order=7)
    model.add_seasonality(name="yearly", period=365.25, fourier_order=10)
    
    #説明変数sell_price追加
    model.add_regressor('sell_price')
    
    model.fit(df_train)
    future = model.make_future_dataframe(periods=14)
    future["wm_yr_wk"] = calendar[:len(future)]["wm_yr_wk"]
  
    # futureにsell_price追加
    future = pd.merge(future, price[(price['store_id'] == train.loc[i,'store_id'])\
                    & (price['item_id'] == train.loc[0,'item_id'])] \
                    [['wm_yr_wk',"sell_price"]], how='left', on=['wm_yr_wk'])
    future = future.fillna({"sell_price":9999999999})

    # 学習
    forecast = model.predict(future)
    
    model.plot(forecast)
    plt.show()
    #df_train.to_csv('df_train' + str(i) + '.csv', index=False)
    yhat_tenchi = forecast[['yhat']].tail(14).T.round(0)
    submit_df = pd.concat([submit_df, yhat_tenchi.reset_index(drop=True)], ignore_index=True)
for i, row in enumerate(submit_df.itertuples(index=False), start=0):
    # start=2 はsample_submitの2行目から始まるため
    # 7列目から始めて、行の各要素を置換する
    for j, value in enumerate(row, start=6):
        sample_submit.iat[i, j] = value

sample_submit.to_csv('submit_edited.csv', index=False)


ValueError: Length of values (2) does not match length of index (1927)

In [70]:
for i in range(10):
    df = pd.DataFrame(calendar[['date', 'd', 'wm_yr_wk', 'event_name_1', 'event_type_1', 'event_name_2', 'event_type_2']])
    df_train =  df.loc[0:1926]
    df_test = df.loc[1927:]

    #学習用df編集，sell_price追加
    df_train['y'] = train.loc[i,'d_1':].values
    df_train = pd.merge(df_train, price[(price['store_id'] == train.loc[i,'store_id']) \
                & (price['item_id'] == train.loc[0,'item_id'])]\
                [['wm_yr_wk', 'sell_price']], how='left', on=['wm_yr_wk'])
    
    df_train = df_train.rename(columns={'date': 'ds'})
    df_train = df_train.fillna({"sell_price":9999999999})

ValueError: Length of values (2) does not match length of index (1927)