In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from fbprophet import Prophet
import multiprocessing
from joblib import Parallel, delayed

In [None]:
# чтение файлов
SalesTrain= pd.read_csv('/kaggle/input/m5-forecasting-accuracy/sales_train_evaluation.csv')
SampleSubmission= pd.read_csv('/kaggle/input/m5-forecasting-accuracy/sample_submission.csv')
SellPrices= pd.read_csv('/kaggle/input/m5-forecasting-accuracy/sell_prices.csv')
Calendar= pd.read_csv('/kaggle/input/m5-forecasting-accuracy/calendar.csv')

In [None]:
# группируем продажи по отделам в разрезе каждого магазина
SalesDepStore = SalesTrain.groupby(['dept_id','store_id'], as_index=False).sum()

In [None]:
# получаем долю продаж каждого отдела в разрезе каждого магазина
SalesTrainSub=SalesTrain[['id','dept_id','store_id']]
SalesDepStoreSub=SalesDepStore[['dept_id','store_id']]
SalesTrainSub['ItematStore1YrMean'] = SalesTrain.iloc[:,1550:1947].mean(axis=1)
SalesDepStoreSub['StoreDep1YrMean'] = SalesDepStore.iloc[:,1550:1947].mean(axis=1)
SalesTrainDepStore = pd.merge(SalesTrainSub,SalesDepStoreSub, on =['dept_id','store_id'])
SalesTrainDepStore['MeanFraction1Yr'] = SalesTrainDepStore['ItematStore1YrMean']/SalesTrainDepStore['StoreDep1YrMean']

In [None]:
#добавляем цены
Train_sub = SalesTrain[['item_id','dept_id','store_id']]
SellPrices = pd.merge(Train_sub,SellPrices,on=['item_id','store_id'])

In [None]:
#группируем цены (среднее) по отделам в разрезе магазинов и дат
SellPricesDepStore = SellPrices[['dept_id','store_id','wm_yr_wk','sell_price']].groupby(['dept_id','store_id','wm_yr_wk'], as_index=False).mean()

In [None]:
#переводим показатель "wm_yr_wk" в d через dataset Calendar.csv
CalendarSub=Calendar[['wm_yr_wk','d']]
SellPricesDepStore = pd.merge(SellPricesDepStore,CalendarSub,on='wm_yr_wk')

In [None]:
#преобразовываем данные для прогноза
SellPricesRegs=SellPricesDepStore.pivot_table(index=['dept_id','store_id'], columns='d', values='sell_price')
SalesTrainDepStoreIndSub=SalesTrainDepStore[['dept_id','store_id']]
SellPricesRegs = pd.merge(SellPricesRegs,SalesTrainDepStoreIndSub,on=['dept_id','store_id'])
SellPricesRegs = SellPricesRegs.reset_index(drop=True)

In [None]:
SellPricesRegs.head()

In [None]:
#создаём таблицу по выходным, праздничным и "знаменательным" дням
HolidayDF1 = Calendar.iloc[858:1969,].loc[Calendar['event_name_1'].notnull()][['event_name_1','date']].rename(columns={'event_name_1':'holiday','date':'ds'})
HolidayDF2 = Calendar.iloc[858:1969,].loc[Calendar['event_name_1'].notnull()][['event_type_1','date']].rename(columns={'event_type_1':'holiday','date':'ds'})
HolidayDF3 = Calendar.iloc[858:1969,].loc[Calendar['event_name_2'].notnull()][['event_name_2','date']].rename(columns={'event_name_2':'holiday','date':'ds'})
HolidayDF4 = Calendar.iloc[858:1969,].loc[Calendar['event_name_2'].notnull()][['event_type_2','date']].rename(columns={'event_type_2':'holiday','date':'ds'})
holidays = pd.concat((HolidayDF1, HolidayDF2,HolidayDF3,HolidayDF4))

In [None]:
holidays.head()

In [None]:
# Функция прогнозирования с использованием библиотеки Prophet (Пророк)
def ProphetFC(i):
    m = Prophet(yearly_seasonality=20, holidays=holidays)
    m.add_seasonality(name='monthly', period=28, fourier_order=10)
    m.add_seasonality(name='weekly', period=7, fourier_order=5)
    tsdf = pd.DataFrame({
      'ds': pd.to_datetime(Calendar.iloc[858:1941,]['date'].reset_index(drop=True)),
      'y': SalesDepStore.iloc[i,860:1943].reset_index(drop=True),
    })
    tsdf['wday']=Calendar.iloc[858:1941,]['wday'].reset_index(drop=True)
    tsdf['month']=Calendar.iloc[858:1941,]['month'].reset_index(drop=True)
    tsdf['year']=Calendar.iloc[858:1941,]['year'].reset_index(drop=True)
    m.add_regressor('wday')
    m.add_regressor('month')
    m.add_regressor('year')
    m.fit(tsdf)
    future = m.make_future_dataframe(periods=28)
    future['wday']=Calendar.iloc[858:1969,]['wday'].reset_index(drop=True)
    future['month']=Calendar.iloc[858:1969,]['month'].reset_index(drop=True)
    future['year']=Calendar.iloc[858:1969,]['year'].reset_index(drop=True)
    fcst = m.predict(future)
    print("Iteration ", i, "Completed")
    FCAST = pd.DataFrame(fcst.iloc[1083:1112,]['yhat'])
    FCAST['dept_id']=SalesDepStore.iloc[i,]['dept_id']
    FCAST['store_id']=SalesDepStore.iloc[i,]['store_id']
    return(FCAST)

In [None]:
# запускаем параллели
num_cores = multiprocessing.cpu_count()
if __name__ == "__main__":
    processed_FC = Parallel(n_jobs=num_cores)(delayed(ProphetFC)(i) for i in range(SalesDepStore.shape[0]))

In [None]:
# Объединение данных
FCAST = pd.concat(processed_FC[0:70])
FCAST['Period']=FCAST.index
FCAST['Period']=FCAST['Period']-1082

In [None]:
#преобразовываем полученный прогноз под формат "Submision"
FCASTPivot=FCAST.pivot_table(index=['dept_id','store_id'], columns='Period', values='yhat')
Submission = pd.merge(SalesTrainDepStore,FCASTPivot, on =['dept_id','store_id'])
for i in range(28):
    Submission.iloc[:,(6+i)] = Submission.iloc[:,(6+i)]*Submission['MeanFraction1Yr']

Submission_valid=Submission[['id', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]]
Submission_valid.columns = SampleSubmission.columns

In [None]:
#получаем submision
SampleSubmission=SampleSubmission[['id']]
SubmissionFinal = pd.merge(SampleSubmission, Submission_valid, on = 'id', how = 'left')
SubmissionFinal = SubmissionFinal.fillna(0)
SubmissionFinal.to_csv('submission.csv',index=False)