In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import RegressorChain
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBRegressor
import ipywidgets as widgets

from sklearn.multioutput import RegressorChain

from numpy import random
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_log_error
from sklearn import metrics
from sklearn.model_selection import KFold, cross_val_score

import warnings
warnings.simplefilter("ignore")

To bring in the code from the 'from learntools.time_series.style import * command from the Kaggle learntools components, since it was very difficult to actually install learntools on a computer. 

In [2]:
plt.style.use("seaborn-whitegrid")
plt.rc(
    "figure",
    autolayout=True,
    figsize=(11, 4),
    titlesize=18,
    titleweight='bold',
)
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=16,
    titlepad=10,
)
plot_params = dict(
    color="0.75",
    style=".-",
    markeredgecolor="0.25",
    markerfacecolor="0.25",
    legend=False,
)

get_ipython().config.InlineBackend.figure_format = 'retina'

To bring in the necessary functions from the Kaggle learntools files.

In [3]:
def create_multistep_example(n, steps, lags, lead_time=1):
    ts = pd.Series(
        np.arange(n),
        index=pd.period_range(start='2010', freq='A', periods=n, name='Year'),
        dtype=pd.Int8Dtype,
    )
    X = make_lags(ts, lags, lead_time)
    y = make_multistep_target(ts, steps, reverse=True)
    data = pd.concat({'Targets': y, 'Features': X}, axis=1)
    data = data.style.set_properties(['Targets'], **{'background-color': 'LavenderBlush'}) \
                     .set_properties(['Features'], **{'background-color': 'Lavender'})
    return data

def load_multistep_data():
    df1 = create_multistep_example(10, steps=1, lags=3, lead_time=1)
    df2 = create_multistep_example(10, steps=3, lags=4, lead_time=2)
    df3 = create_multistep_example(10, steps=3, lags=4, lead_time=1)
    return [df1, df2, df3]

def plot_multistep(y, every=1, ax=None, palette_kwargs=None):
    palette_kwargs_ = dict(palette='husl', n_colors=16, desat=None)
    if palette_kwargs is not None:
        palette_kwargs_.update(palette_kwargs)
    palette = sns.color_palette(**palette_kwargs_)
    if ax is None:
        fig, ax = plt.subplots()
    ax.set_prop_cycle(plt.cycler('color', palette))
    for date, preds in y[::every].iterrows():
        preds.index = pd.period_range(start=date, periods=len(preds))
        preds.plot(ax=ax)
    return ax

def make_lags(ts, lags, lead_time=1, name='y'):
    return pd.concat(
        {
            f'{name}_lag_{i}': ts.shift(i)
            for i in range(lead_time, lags + lead_time)
        },
        axis=1)

def make_multistep_target(ts, steps, reverse=False):
    shifts = reversed(range(steps)) if reverse else range(steps)
    return pd.concat({f'y_step_{i + 1}': ts.shift(-i) for i in shifts}, axis=1)

In [4]:
train = pd.read_csv('train_features.csv')
holidays = pd.read_csv('holidays_events.csv')
oil = pd.read_csv('oil.csv')
stores = pd.read_csv('stores.csv')
transactions = pd.read_csv('transactions.csv')
submission = pd.read_csv('sample_submission.csv')

In [5]:
test = pd.read_csv(
    'test_features.csv',
    dtype={
        'store_nbr': 'category',
        'family': 'category',
        'onpromotion': 'uint32',
    },
    parse_dates=['date'],
    infer_datetime_format=True,
)
test['date'] = test.date.dt.to_period('D')
test = test.set_index(['store_nbr', 'family', 'date']).sort_index()

In [6]:
test

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,onpromotion,day_of_week,weekend,year,month,day_of_month,dcoilwtico,city,state,type,cluster,date_str,national_holiday,national_event,national_workday,local_holiday,regional_holiday
store_nbr,family,date,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,AUTOMOTIVE,2017-08-16,3000888,0,3,0,2017,8,16,46.800000,Quito,Pichincha,D,13,2017-08-16,0,0,0,0,0
1,AUTOMOTIVE,2017-08-17,3002670,0,4,0,2017,8,17,47.070000,Quito,Pichincha,D,13,2017-08-17,0,0,0,0,0
1,AUTOMOTIVE,2017-08-18,3004452,0,5,0,2017,8,18,48.590000,Quito,Pichincha,D,13,2017-08-18,0,0,0,0,0
1,AUTOMOTIVE,2017-08-19,3006234,0,6,1,2017,8,19,48.190000,Quito,Pichincha,D,13,2017-08-19,0,0,0,0,0
1,AUTOMOTIVE,2017-08-20,3008016,0,7,1,2017,8,20,47.790000,Quito,Pichincha,D,13,2017-08-20,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,SEAFOOD,2017-08-27,3022271,0,7,1,2017,8,27,46.816667,Quito,Pichincha,B,6,2017-08-27,0,0,0,0,0
9,SEAFOOD,2017-08-28,3024053,0,1,0,2017,8,28,46.400000,Quito,Pichincha,B,6,2017-08-28,0,0,0,0,0
9,SEAFOOD,2017-08-29,3025835,0,2,0,2017,8,29,46.460000,Quito,Pichincha,B,6,2017-08-29,0,0,0,0,0
9,SEAFOOD,2017-08-30,3027617,0,3,0,2017,8,30,45.960000,Quito,Pichincha,B,6,2017-08-30,0,0,0,0,0


In [7]:
submission

Unnamed: 0,id,sales
0,3000888,0.0
1,3000889,0.0
2,3000890,0.0
3,3000891,0.0
4,3000892,0.0
...,...,...
28507,3029395,0.0
28508,3029396,0.0
28509,3029397,0.0
28510,3029398,0.0


To create the sales by store number for all years, not just 2017. 

In [8]:
store_sales = pd.read_csv(
    'train.csv',
    usecols=['store_nbr', 'family', 'date', 'sales', 'onpromotion'],
    dtype={
        'store_nbr': 'category',
        'family': 'category',
        'sales': 'float32',
        'onpromotion': 'uint32',
    },
    parse_dates=['date'],
    infer_datetime_format=True,
)
store_sales['date'] = store_sales.date.dt.to_period('D')
store_sales = store_sales.set_index(['store_nbr', 'family', 'date']).sort_index()

In [9]:
store_sales

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,sales,onpromotion
store_nbr,family,date,Unnamed: 3_level_1,Unnamed: 4_level_1
1,AUTOMOTIVE,2013-01-01,0.000000,0
1,AUTOMOTIVE,2013-01-02,2.000000,0
1,AUTOMOTIVE,2013-01-03,3.000000,0
1,AUTOMOTIVE,2013-01-04,3.000000,0
1,AUTOMOTIVE,2013-01-05,5.000000,0
...,...,...,...,...
9,SEAFOOD,2017-08-11,23.830999,0
9,SEAFOOD,2017-08-12,16.859001,4
9,SEAFOOD,2017-08-13,20.000000,0
9,SEAFOOD,2017-08-14,17.000000,0


To create a dataframe for average sales by family and then promotions by family, although I'm not sure what the promotions by family signify. 

In [10]:
family_sales = (
    store_sales
    .groupby(['family', 'date'])
    .mean()
    .unstack('family')
    .loc['2017']
)

In [11]:
family_sales

Unnamed: 0_level_0,sales,sales,sales,sales,sales,sales,sales,sales,sales,sales,...,onpromotion,onpromotion,onpromotion,onpromotion,onpromotion,onpromotion,onpromotion,onpromotion,onpromotion,onpromotion
family,AUTOMOTIVE,BABY CARE,BEAUTY,BEVERAGES,BOOKS,BREAD/BAKERY,CELEBRATION,CLEANING,DAIRY,DELI,...,MAGAZINES,MEATS,PERSONAL CARE,PET SUPPLIES,PLAYERS AND ELECTRONICS,POULTRY,PREPARED FOODS,PRODUCE,SCHOOL AND OFFICE SUPPLIES,SEAFOOD
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-01-01,0.092593,0.037037,0.055556,74.222221,0.000000,9.084685,0.129630,7.500000,11.518518,3.629167,...,0.0,0.018519,0.111111,0.018519,0.000000,0.000000,0.037037,0.129630,0.000000,0.000000
2017-01-02,11.481482,0.259259,11.648149,6208.055664,0.481481,844.836304,14.203704,2233.648193,1545.000000,539.114807,...,0.0,0.462963,10.592593,0.537037,0.000000,0.259259,1.166667,5.629630,0.000000,0.407407
2017-01-03,8.296296,0.296296,7.185185,4507.814941,0.814815,665.124084,10.629630,1711.907349,1204.203735,404.300079,...,0.0,0.481481,9.722222,0.444444,0.000000,0.388889,1.351852,56.296296,0.000000,0.407407
2017-01-04,6.833333,0.333333,6.888889,3911.833252,0.759259,594.160583,11.185185,1508.036987,1107.796265,309.397675,...,0.0,0.370370,12.037037,0.444444,0.000000,0.296296,5.444444,101.277778,0.000000,0.333333
2017-01-05,6.333333,0.351852,5.925926,3258.796387,0.407407,495.511597,12.444445,1241.833374,829.277771,260.776489,...,0.0,8.981481,5.666667,0.000000,0.000000,0.296296,0.907407,5.018519,0.000000,0.444444
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-08-11,8.166667,0.129630,6.500000,3502.055664,0.000000,522.305298,16.111111,1031.148193,831.648132,345.934967,...,0.0,22.925926,9.000000,0.000000,0.000000,27.407407,0.537037,6.000000,2.666667,2.537037
2017-08-12,7.462963,0.055556,6.833333,3376.259277,0.000000,507.473114,15.722222,1072.870361,793.592590,290.553589,...,0.0,0.000000,8.703704,0.000000,0.018519,0.111111,0.277778,6.000000,2.759259,0.759259
2017-08-13,8.907408,0.166667,8.018518,3747.296387,0.018519,615.976990,10.074074,1143.648193,928.000000,325.801361,...,0.0,0.000000,9.240741,0.000000,0.000000,0.185185,0.222222,5.907407,2.777778,0.018519
2017-08-14,5.407407,0.166667,6.240741,3237.629639,0.000000,518.564026,11.000000,1019.111084,797.222229,271.532227,...,0.0,0.000000,8.722222,0.000000,0.000000,0.129630,0.148148,6.018519,2.851852,0.000000


In [12]:
datasets = load_multistep_data()

data_tabs = widgets.Tab([widgets.Output() for _ in enumerate(datasets)])
for i, df in enumerate(datasets):
    data_tabs.set_title(i, f'Dataset {i+1}')
    with data_tabs.children[i]:
        display(df)

display(data_tabs)

Tab(children=(Output(), Output(), Output()), _titles={'0': 'Dataset 1', '1': 'Dataset 2', '2': 'Dataset 3'})

In [13]:
print("Training Data", "\n" + "-" * 13 + "\n", store_sales)
print("\n")
print("Test Data", "\n" + "-" * 9 + "\n", test)

Training Data 
-------------
                                      sales  onpromotion
store_nbr family     date                              
1         AUTOMOTIVE 2013-01-01   0.000000            0
                     2013-01-02   2.000000            0
                     2013-01-03   3.000000            0
                     2013-01-04   3.000000            0
                     2013-01-05   5.000000            0
...                                    ...          ...
9         SEAFOOD    2017-08-11  23.830999            0
                     2017-08-12  16.859001            4
                     2017-08-13  20.000000            0
                     2017-08-14  17.000000            0
                     2017-08-15  16.000000            0

[3000888 rows x 2 columns]


Test Data 
---------
                                       id  onpromotion  day_of_week  weekend  \
store_nbr family     date                                                     
1         AUTOMOTIVE 2017-08-16 

In [18]:
# YOUR CODE HERE
y = family_sales.loc[:, 'sales']

# YOUR CODE HERE: Make 4 lag features
X = make_lags(y, lags=4).dropna()

# YOUR CODE HERE: Make multistep target
y = make_multistep_target(y, steps=16).dropna()

y, X = y.align(X, join='inner', axis=0)


In [19]:
X

Unnamed: 0_level_0,y_lag_1,y_lag_1,y_lag_1,y_lag_1,y_lag_1,y_lag_1,y_lag_1,y_lag_1,y_lag_1,y_lag_1,...,y_lag_4,y_lag_4,y_lag_4,y_lag_4,y_lag_4,y_lag_4,y_lag_4,y_lag_4,y_lag_4,y_lag_4
family,AUTOMOTIVE,BABY CARE,BEAUTY,BEVERAGES,BOOKS,BREAD/BAKERY,CELEBRATION,CLEANING,DAIRY,DELI,...,MAGAZINES,MEATS,PERSONAL CARE,PET SUPPLIES,PLAYERS AND ELECTRONICS,POULTRY,PREPARED FOODS,PRODUCE,SCHOOL AND OFFICE SUPPLIES,SEAFOOD
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-01-05,6.833333,0.333333,6.888889,3911.833252,0.759259,594.160583,11.185185,1508.036987,1107.796265,309.397675,...,0.074074,5.013166,2.629630,0.074074,0.222222,2.855537,1.040722,31.163778,0.000000,0.259259
2017-01-06,6.333333,0.351852,5.925926,3258.796387,0.407407,495.511597,12.444445,1241.833374,829.277771,260.776489,...,7.777778,488.522339,714.370361,12.814815,22.777779,581.413147,153.555542,3409.500488,2.925926,33.581944
2017-01-07,6.018518,0.277778,6.518518,3507.277832,0.537037,513.873291,18.333334,1257.611084,864.333313,317.622406,...,3.500000,427.520905,488.333344,8.870370,13.555555,462.013672,125.820534,3394.559814,2.018518,27.135260
2017-01-08,10.259259,0.259259,10.037037,4848.518555,0.481481,649.424133,21.444445,1761.351807,1173.463013,381.874756,...,3.500000,369.121490,426.574066,8.129630,12.092592,431.276398,116.740814,3467.447998,1.722222,22.034130
2017-01-09,9.388889,0.240741,11.611111,5503.647949,0.722222,776.717834,10.925926,1784.018555,1323.314819,429.561798,...,3.203704,368.945679,360.925934,7.629630,10.333333,345.673492,101.434570,2069.316650,1.425926,20.615334
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-07-27,5.481482,0.166667,4.537037,2892.148193,0.000000,468.401306,10.148149,1062.425903,795.666687,243.997375,...,8.777778,414.680817,414.537048,11.148149,15.185185,481.465210,103.208595,2707.897461,6.592593,24.742908
2017-07-28,6.277778,0.240741,3.981482,2623.814697,0.037037,411.163452,19.222221,1395.314819,623.944458,213.962723,...,6.370370,349.225952,315.592590,8.407408,9.574074,360.456696,89.320778,2168.835205,3.555556,18.431982
2017-07-29,6.370370,0.166667,4.092593,3554.111084,0.000000,493.264771,16.666666,1548.685181,764.481506,335.682251,...,5.425926,308.749786,257.685181,6.518518,8.481482,310.549103,76.463348,2138.104736,3.222222,15.594278
2017-07-30,10.018518,0.203704,7.185185,4612.685059,0.000000,596.895386,19.500000,1906.611084,983.037048,354.486481,...,5.111111,307.780975,267.981476,6.722222,9.296296,306.457153,82.526276,2702.183594,3.074074,15.549167


In [16]:
y

Unnamed: 0_level_0,y_step_1,y_step_1,y_step_1,y_step_1,y_step_1,y_step_1,y_step_1,y_step_1,y_step_1,y_step_1,...,y_step_16,y_step_16,y_step_16,y_step_16,y_step_16,y_step_16,y_step_16,y_step_16,y_step_16,y_step_16
family,AUTOMOTIVE,BABY CARE,BEAUTY,BEVERAGES,BOOKS,BREAD/BAKERY,CELEBRATION,CLEANING,DAIRY,DELI,...,MAGAZINES,MEATS,PERSONAL CARE,PET SUPPLIES,PLAYERS AND ELECTRONICS,POULTRY,PREPARED FOODS,PRODUCE,SCHOOL AND OFFICE SUPPLIES,SEAFOOD
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-01-05,6.333333,0.351852,5.925926,3258.796387,0.407407,495.511597,12.444445,1241.833374,829.277771,260.776489,...,6.537037,512.165527,233.611115,6.444445,8.648149,513.483887,106.494164,2006.322021,0.944444,25.899963
2017-01-06,6.018518,0.277778,6.518518,3507.277832,0.537037,513.873291,18.333334,1257.611084,864.333313,317.622406,...,10.444445,402.395264,420.944458,11.314815,16.148148,489.485840,143.066299,2849.252930,1.944444,26.443592
2017-01-07,10.259259,0.259259,10.037037,4848.518555,0.481481,649.424133,21.444445,1761.351807,1173.463013,381.874756,...,8.648149,413.076538,462.462952,12.462963,18.351852,525.847107,141.927856,3101.657715,3.555556,29.259481
2017-01-08,9.388889,0.240741,11.611111,5503.647949,0.722222,776.717834,10.925926,1784.018555,1323.314819,429.561798,...,7.037037,310.008514,311.888885,6.962963,11.518518,370.790070,97.506241,2168.730225,1.740741,20.695278
2017-01-09,5.944445,0.444444,5.648148,3448.203613,0.500000,535.816040,9.240741,1208.018555,883.685181,284.767456,...,5.259259,300.459290,323.500000,5.814815,9.370370,326.745636,97.317223,2247.617920,1.629630,17.374334
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-07-27,6.277778,0.240741,3.981482,2623.814697,0.037037,411.163452,19.222221,1395.314819,623.944458,213.962723,...,9.259259,498.964661,295.888885,10.870370,12.111111,468.857361,96.286926,2198.854492,65.240738,23.566963
2017-07-28,6.370370,0.166667,4.092593,3554.111084,0.000000,493.264771,16.666666,1548.685181,764.481506,335.682251,...,8.944445,329.178711,294.500000,9.407408,13.185185,354.342773,84.693817,2070.154785,67.481483,19.037592
2017-07-29,10.018518,0.203704,7.185185,4612.685059,0.000000,596.895386,19.500000,1906.611084,983.037048,354.486481,...,8.685185,345.055817,336.814819,10.018518,13.722222,379.801208,91.509422,2331.922363,68.851852,20.704575
2017-07-30,9.796296,0.259259,8.388889,5074.629395,0.018519,701.206299,10.833333,2107.722168,1100.592651,395.133301,...,8.462963,314.364563,279.203705,7.722222,9.259259,344.398285,86.062500,2134.399902,52.333332,17.975555


In [20]:
le = LabelEncoder()
X = (X
    .stack('family')  # wide to long
    .reset_index('family')  # convert index to column
    .assign(family=lambda x: le.fit_transform(x.family))  # label encode
)
y = y.stack('family')  # wide to long

display(y)

Unnamed: 0_level_0,Unnamed: 1_level_0,y_step_1,y_step_2,y_step_3,y_step_4,y_step_5,y_step_6,y_step_7,y_step_8,y_step_9,y_step_10,y_step_11,y_step_12,y_step_13,y_step_14,y_step_15,y_step_16
date,family,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2017-01-05,AUTOMOTIVE,6.333333,6.018518,10.259259,9.388889,5.944445,4.777778,6.314815,5.388889,5.240741,8.500000,10.259259,6.407407,5.685185,5.703704,4.777778,5.148148
2017-01-05,BABY CARE,0.351852,0.277778,0.259259,0.240741,0.444444,0.240741,0.277778,0.296296,0.296296,0.388889,0.425926,0.314815,0.166667,0.222222,0.129630,0.166667
2017-01-05,BEAUTY,5.925926,6.518518,10.037037,11.611111,5.648148,6.500000,5.277778,4.370370,4.703704,7.777778,9.037037,5.648148,5.351852,4.740741,3.981482,4.592593
2017-01-05,BEVERAGES,3258.796387,3507.277832,4848.518555,5503.647949,3448.203613,3171.740723,3046.870361,2693.722168,3226.037109,4667.296387,5580.611328,3700.370361,3409.796387,3263.462891,2676.573975,3003.555664
2017-01-05,BOOKS,0.407407,0.537037,0.481481,0.722222,0.500000,0.518519,0.481481,0.388889,0.444444,0.574074,0.555556,0.388889,0.500000,0.407407,0.277778,0.351852
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-07-31,POULTRY,364.955658,403.601349,377.313965,316.436096,533.497070,416.454010,464.596558,344.051727,313.780884,305.270203,278.819855,468.857361,354.342773,379.801208,344.398285,325.679810
2017-07-31,PREPARED FOODS,84.698647,87.836800,88.735962,77.172997,91.886757,100.384964,102.248146,86.627441,77.344131,84.796539,78.791443,96.286926,84.693817,91.509422,86.062500,85.954132
2017-07-31,PRODUCE,2257.140625,2609.180176,3122.895752,1792.220947,2079.319580,2418.970215,2675.105713,2111.133545,2168.535400,2663.076172,1670.264893,2198.854492,2070.154785,2331.922363,2134.399902,2316.832764
2017-07-31,SCHOOL AND OFFICE SUPPLIES,30.111111,49.333332,57.481480,51.907406,63.222221,85.203705,100.277779,64.407410,59.759258,53.740742,42.962963,65.240738,67.481483,68.851852,52.333332,46.851852


In [21]:
X

Unnamed: 0_level_0,family,y_lag_1,y_lag_2,y_lag_3,y_lag_4
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-05,0,6.833333,8.296296,11.481482,0.092593
2017-01-05,1,0.333333,0.296296,0.259259,0.037037
2017-01-05,2,6.888889,7.185185,11.648149,0.055556
2017-01-05,3,3911.833252,4507.814941,6208.055664,74.222221
2017-01-05,4,0.759259,0.814815,0.481481,0.000000
...,...,...,...,...,...
2017-07-31,28,464.615662,416.242065,528.171875,269.486877
2017-07-31,29,101.991165,100.165146,87.455833,77.199738
2017-07-31,30,2704.551758,2444.234375,2073.127686,1675.579346
2017-07-31,31,37.537037,24.907408,10.500000,4.518518


In [22]:
model = RegressorChain(XGBRegressor())

In [23]:
model.fit(X, y)

y_pred = pd.DataFrame(
    model.predict(X),
    index=y.index,
    columns=y.columns,
).clip(0.0)

In [24]:
y_pred

Unnamed: 0_level_0,Unnamed: 1_level_0,y_step_1,y_step_2,y_step_3,y_step_4,y_step_5,y_step_6,y_step_7,y_step_8,y_step_9,y_step_10,y_step_11,y_step_12,y_step_13,y_step_14,y_step_15,y_step_16
date,family,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2017-01-05,AUTOMOTIVE,6.706484,6.588345,7.489037,6.631713,7.686126,6.872079,6.600883,6.618326,6.796880,6.139919,6.621857,6.541267,6.464437,6.452060,6.037592,6.704325
2017-01-05,BABY CARE,0.945634,1.420450,1.106641,1.412934,0.911818,1.030322,0.990770,1.474051,1.755131,1.427863,1.460183,1.483901,1.466740,1.270841,1.036739,1.223531
2017-01-05,BEAUTY,6.706484,6.588345,7.489037,7.386412,7.686126,6.872079,6.600883,6.800453,6.796880,6.139919,6.621857,6.541267,6.464437,6.391368,6.287976,6.704325
2017-01-05,BEVERAGES,3262.328613,3507.129395,4851.775879,5490.772949,3439.650879,3164.806641,3051.604980,2790.089600,3221.736084,4674.974121,5587.095215,3782.173096,3405.334473,3322.945557,2718.560547,2982.668457
2017-01-05,BOOKS,1.168675,1.420450,1.106641,1.412934,0.911818,1.030322,1.109941,1.474051,1.755131,1.427863,1.460183,1.483901,1.466740,1.270841,1.036739,1.223531
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-07-31,POULTRY,345.968353,339.211426,352.909332,319.742065,546.124817,442.962830,468.727417,347.168243,320.476807,322.663940,290.080231,538.398560,418.802734,465.849976,325.088257,348.642792
2017-07-31,PREPARED FOODS,98.439781,107.795624,104.425468,84.917015,83.274704,99.409241,101.837982,101.290146,103.413254,101.597092,83.801720,83.925331,99.480164,102.894066,99.903076,104.674080
2017-07-31,PRODUCE,2222.561035,2357.666748,3117.094238,1702.226318,2024.906860,2297.686768,2894.276611,2089.364746,2049.405518,2727.199951,1735.701050,2222.351807,2312.296387,2676.415527,2192.415527,2239.247070
2017-07-31,SCHOOL AND OFFICE SUPPLIES,30.678667,27.590700,25.955845,20.388109,14.614511,21.536602,32.804363,29.191860,26.184706,26.345705,23.312809,17.005836,20.016520,28.905056,29.366232,26.189816


In [25]:
mean_squared_log_error(y,y_pred)

0.10934690294361368

In [26]:
y_pred.dtypes

y_step_1     float64
y_step_2     float64
y_step_3     float64
y_step_4     float64
y_step_5     float64
y_step_6     float64
y_step_7     float64
y_step_8     float64
y_step_9     float64
y_step_10    float64
y_step_11    float64
y_step_12    float64
y_step_13    float64
y_step_14    float64
y_step_15    float64
y_step_16    float64
dtype: object

In [27]:
y_pred['y_step_mean'] = y_pred.mean(axis=1)

In [28]:
y_pred

Unnamed: 0_level_0,Unnamed: 1_level_0,y_step_1,y_step_2,y_step_3,y_step_4,y_step_5,y_step_6,y_step_7,y_step_8,y_step_9,y_step_10,y_step_11,y_step_12,y_step_13,y_step_14,y_step_15,y_step_16,y_step_mean
date,family,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2017-01-05,AUTOMOTIVE,6.706484,6.588345,7.489037,6.631713,7.686126,6.872079,6.600883,6.618326,6.796880,6.139919,6.621857,6.541267,6.464437,6.452060,6.037592,6.704325,6.684458
2017-01-05,BABY CARE,0.945634,1.420450,1.106641,1.412934,0.911818,1.030322,0.990770,1.474051,1.755131,1.427863,1.460183,1.483901,1.466740,1.270841,1.036739,1.223531,1.276097
2017-01-05,BEAUTY,6.706484,6.588345,7.489037,7.386412,7.686126,6.872079,6.600883,6.800453,6.796880,6.139919,6.621857,6.541267,6.464437,6.391368,6.287976,6.704325,6.754865
2017-01-05,BEVERAGES,3262.328613,3507.129395,4851.775879,5490.772949,3439.650879,3164.806641,3051.604980,2790.089600,3221.736084,4674.974121,5587.095215,3782.173096,3405.334473,3322.945557,2718.560547,2982.668457,3703.352905
2017-01-05,BOOKS,1.168675,1.420450,1.106641,1.412934,0.911818,1.030322,1.109941,1.474051,1.755131,1.427863,1.460183,1.483901,1.466740,1.270841,1.036739,1.223531,1.297485
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-07-31,POULTRY,345.968353,339.211426,352.909332,319.742065,546.124817,442.962830,468.727417,347.168243,320.476807,322.663940,290.080231,538.398560,418.802734,465.849976,325.088257,348.642792,387.051111
2017-07-31,PREPARED FOODS,98.439781,107.795624,104.425468,84.917015,83.274704,99.409241,101.837982,101.290146,103.413254,101.597092,83.801720,83.925331,99.480164,102.894066,99.903076,104.674080,97.567421
2017-07-31,PRODUCE,2222.561035,2357.666748,3117.094238,1702.226318,2024.906860,2297.686768,2894.276611,2089.364746,2049.405518,2727.199951,1735.701050,2222.351807,2312.296387,2676.415527,2192.415527,2239.247070,2303.801010
2017-07-31,SCHOOL AND OFFICE SUPPLIES,30.678667,27.590700,25.955845,20.388109,14.614511,21.536602,32.804363,29.191860,26.184706,26.345705,23.312809,17.005836,20.016520,28.905056,29.366232,26.189816,25.005459


In [35]:
y_pred_end = y_pred.loc['2017-07-31']

In [36]:
y_pred_end

Unnamed: 0_level_0,y_step_1,y_step_2,y_step_3,y_step_4,y_step_5,y_step_6,y_step_7,y_step_8,y_step_9,y_step_10,y_step_11,y_step_12,y_step_13,y_step_14,y_step_15,y_step_16,y_step_mean
family,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
AUTOMOTIVE,8.741525,8.230907,8.059281,7.697957,7.454085,9.406362,9.780702,8.112112,7.454203,7.083698,6.170094,6.452662,9.075361,10.201166,7.744903,7.86375,8.095548
BABY CARE,0.855913,1.42045,1.106641,1.412934,0.911818,1.030322,0.99077,1.474051,1.755131,1.427863,1.460183,1.483901,1.46674,1.270841,1.036739,1.223531,1.270489
BEAUTY,8.635438,8.717885,8.059281,7.697957,7.454085,7.114341,6.600883,7.028884,6.79688,6.139919,6.170094,6.261571,6.464437,6.692132,6.543715,6.704325,7.067614
BEVERAGES,3611.853516,3928.347412,3840.582764,2973.912842,3384.211914,4105.192871,4566.334961,4195.241699,3475.625488,3417.067871,3113.175781,3223.417236,3612.671143,4340.069336,4399.397461,3581.656982,3735.547455
BOOKS,0.855913,1.42045,1.106641,1.412934,0.469668,0.841225,0.99077,1.474051,1.755131,1.427863,1.155044,1.40287,1.46674,1.270841,1.036739,1.231372,1.207391
BREAD/BAKERY,586.560425,516.589966,498.105652,408.901581,494.932709,626.371704,684.713989,587.664307,506.959564,510.319183,426.836029,505.177582,610.982788,716.494385,548.619202,506.182922,545.963249
CELEBRATION,12.658911,12.713011,15.190883,20.056824,14.978575,18.998369,13.581182,10.665528,12.111331,13.092239,17.932217,14.534848,17.802601,16.17823,13.391266,11.832344,14.732397
CLEANING,1732.140503,1918.35791,1724.987183,1466.72522,1319.848145,1352.76355,1733.613525,1491.804565,1204.711792,1328.758545,1345.869751,1331.442261,1260.329102,1444.0271,1537.790161,1226.888916,1463.753639
DAIRY,875.870422,819.906372,900.825745,730.427917,773.668396,1029.103271,1253.124634,876.319641,791.488708,849.281616,711.476257,817.938782,985.022461,1081.532471,926.651001,836.935608,891.223331
DELI,298.098053,284.974762,253.456253,236.885254,333.632202,386.086365,434.600739,318.737061,283.775452,262.433075,243.2099,343.99295,370.568207,439.181793,300.556122,277.529175,316.732335
