In [None]:
from multiprocessing import Pool
import pandas as pd
from fbprophet import Prophet

In [None]:
calendar = pd.read_csv('../input/m5-forecasting-uncertainty/calendar.csv', parse_dates=['date'])

In [None]:
AGGREGATION_LEVELS = [
    [],
    ['state_id'],
    ['store_id'],
    ['cat_id'],
    ['dept_id'],
    ['state_id', 'cat_id'],
    ['state_id', 'dept_id'],
    ['store_id', 'cat_id'],
    ['store_id', 'dept_id'],
    ['item_id'],
    ['state_id', 'item_id'],
    ['item_id', 'store_id']
]
INTERVALS = [0.99, 0.95, 0.75, 0.5]

In [None]:
def read_sales(filename):
    sales = pd.read_csv(filename)
    agg_sales = []
    for level in AGGREGATION_LEVELS:
        if len(level) == 0:
            agg = pd.DataFrame(sales.sum(numeric_only=True)).T
            agg['id'] = 'Total_X'
        elif len(level) == 1:
            agg = sales.groupby(level).sum(numeric_only=True).reset_index()
            agg['id'] = agg[level[0]] + '_X'
            agg.drop(level, axis=1, inplace=True)
        else:
            agg = sales.groupby(level).sum(numeric_only=True).reset_index()
            agg['id'] = agg[level[0]] + '_' + agg[level[1]]
            agg.drop(level, axis=1, inplace=True)
        agg_sales.append(agg)
    sales = pd.concat(agg_sales)
    sales.set_index('id', inplace=True)
    sales.columns = calendar.date[:len(sales.columns)]
    return sales

In [None]:
def fit_model(params):
    data, prefix, suffix = params
    data = data.T.reset_index()
    data.columns = ['ds', 'y']
    quantiles = []
    for interval in INTERVALS:
        model = Prophet(interval_width=interval)
        model.fit(data)
        future = model.make_future_dataframe(periods=28)
        forecast = model.predict(future)
        quantile = forecast[['ds', 'yhat_lower', 'yhat_upper']].tail(28).copy()
        lower = (1 - interval) / 2
        upper = 1 - lower
        quantile.columns = ['date', f'{prefix}_{lower:.3f}_{suffix}', f'{prefix}_{upper:.3f}_{suffix}']
        quantile = quantile.set_index('date').T
        quantile.index.name = 'id'
        quantiles.append(quantile)
    median = forecast[['ds', 'yhat']].tail(28).copy()
    median.columns = ['date', f'{prefix}_0.500_{suffix}']
    median = median.set_index('date').T
    median.index.name = 'id'
    quantiles.append(median)
    quantiles = pd.concat(quantiles)
    return quantiles

In [None]:
def forecast(sales, suffix='validation'):
    sales_list = [(row, row.name, suffix) for _, row in sales.head(8).iterrows()]  # for kaggle env
    # sales_list = [(row, row.name, suffix) for _, row in sales.iterrows()]
    pool = Pool(4)
    result = pool.map(fit_model, sales_list)
    return pd.concat(result)

In [None]:
%%time
sales_valid = read_sales('../input/m5-forecasting-uncertainty/sales_train_validation.csv')

In [None]:
%%time
sales_eval = read_sales('../input/m5-forecasting-uncertainty/sales_train_evaluation.csv')

In [None]:
def forecast_one(index=0):
    data = sales_valid.iloc[index]
    params = data, data.name, 'plot'
    forecast_valid = fit_model(params)
    data = sales_eval.iloc[index]
    params = data, data.name, 'plot'
    forecast_eval = fit_model(params)
    data = pd.concat([forecast_valid, forecast_eval], axis=1)
    data = pd.concat([sales_eval.iloc[index:index+1, -28:], data])
    return data

In [None]:
%%time
result = forecast_one()

In [None]:
result.T.plot(figsize=(16,9))

In [None]:
%%time
sub_valid = forecast(sales_valid)

In [None]:
%%time
sub_eval = forecast(sales_eval, suffix='evaluation')

In [None]:
sub_valid.columns = [f'F{i+1}' for i in range(28)]
sub_eval.columns = [f'F{i+1}' for i in range(28)]
submission = pd.concat([sub_valid, sub_eval])
submission.to_csv('submission.csv', float_format='%.5g')

In [None]:
submission