In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Basically, I try to review what I learned from "Time series Course" by Kaggle. Also I am going to make my forecasting for a "Store Sales" competition.

# 1. Importing Dataset

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.linear_model import LinearRegression
from pathlib import Path

comp_dir = Path('/kaggle/input/store-sales-time-series-forecasting')

holidays_events = pd.read_csv(
    comp_dir / "holidays_events.csv",
    dtype={
        'type': 'category',
        'locale': 'category',
        'locale_name': 'category',
        'description': 'category',
        'transferred': 'bool',
    },
    parse_dates=['date'],
    infer_datetime_format=True,
)
holidays_events = holidays_events.set_index('date').to_period('D')

store_sales = pd.read_csv(
    comp_dir / 'train.csv',
    usecols=['store_nbr', 'family', 'date', 'sales'],
    dtype={
        'store_nbr': 'category',
        'family': 'category',
        'sales': 'float32',
    },
    parse_dates=['date'],
    infer_datetime_format=True,
)
store_sales['date'] = store_sales.date.dt.to_period('D')
store_sales = store_sales.set_index(['store_nbr', 'family', 'date']).sort_index()
average_sales = (
    store_sales
    .groupby('date').mean()
    .squeeze()
    #.loc['2017']
)

In [None]:
from pathlib import Path
from warnings import simplefilter

import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from statsmodels.tsa.deterministic import CalendarFourier, DeterministicProcess
from xgboost import XGBRegressor


simplefilter("ignore")

# Set Matplotlib defaults
plt.style.use("seaborn-whitegrid")
plt.rc(
    "figure",
    autolayout=True,
    figsize=(11, 4),
    titlesize=18,
    titleweight='bold',
)
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=16,
    titlepad=10,
)
plot_params = dict(
    color="0.75",
    style=".-",
    markeredgecolor="0.25",
    markerfacecolor="0.25",
)

# 2. Trend

Let's find out "Trend" feature by using mean value of sales.

In [None]:
y = average_sales.copy()

# Create trend features
dp_1 = DeterministicProcess(
    index=y.index,  # dates from the training data
    constant=True,  # the intercept
    order=2,        # quadratic trend
    drop=True,      # drop terms to avoid collinearity
)
X = dp_1.in_sample()  # features for the training data

idx_train, idx_test = train_test_split(
    y.index, test_size=12, shuffle=False,
)
X_train, X_test = X.loc[idx_train, :], X.loc[idx_test, :]
y_train, y_test = y.loc[idx_train], y.loc[idx_test]

# Fit trend model
model = LinearRegression(fit_intercept=False)
model.fit(X_train, y_train)

# Make predictions
y_fit = pd.DataFrame(
    model.predict(X_train),
    index=y_train.index,
)
y_pred = pd.DataFrame(
    model.predict(X_test),
    index=y_test.index,
)

# Plot
axs = y_train.plot(color='0.25', subplots=True, sharex=True)
axs = y_test.plot(color='0.25', subplots=True, sharex=True, ax=axs)
axs = y_fit.plot(color='C0', subplots=True, sharex=True, ax=axs)
axs = y_pred.plot(color='C3', subplots=True, sharex=True, ax=axs)
for ax in axs: ax.legend([])
_ = plt.suptitle("Trends")

I can find that there is an ascending trend by years.

# 3. Seasonality

In [None]:
def plot_periodogram(ts, detrend='linear', ax=None):
    from scipy.signal import periodogram
    fs = pd.Timedelta("1Y") / pd.Timedelta("1D")
    freqencies, spectrum = periodogram(
        ts,
        fs=fs,
        detrend=detrend,
        window="boxcar",
        scaling='spectrum',
    )
    if ax is None:
        _, ax = plt.subplots()
    ax.step(freqencies, spectrum, color="purple")
    ax.set_xscale("log")
    ax.set_xticks([1, 2, 4, 6, 12, 26, 52, 104])
    ax.set_xticklabels(
        [
            "Annual (1)",
            "Semiannual (2)",
            "Quarterly (4)",
            "Bimonthly (6)",
            "Monthly (12)",
            "Biweekly (26)",
            "Weekly (52)",
            "Semiweekly (104)",
        ],
        rotation=30,
    )
    ax.ticklabel_format(axis="y", style="sci", scilimits=(0, 0))
    ax.set_ylabel("Variance")
    ax.set_title("Periodogram")
    return ax

In [None]:
plot_periodogram(y);

Based on the above graph, I think there is weekly seasonality.

In [None]:
from statsmodels.tsa.deterministic import CalendarFourier, DeterministicProcess

fourier_2 = CalendarFourier(freq="W", order=6)  

dp_2 = DeterministicProcess(
    index=y.index,
    constant=True,               # dummy feature for bias (y-intercept)
    order=1,                     # trend
    seasonal=True,               # weekly seasonality (indicators)
    additional_terms=[fourier_2],  # annual seasonality (fourier)
    drop=True,                   # drop terms to avoid collinearity
)

X2 = dp_2.in_sample()

In [None]:
y_season = y.copy()

model_2 = LinearRegression(fit_intercept=False)
model_2.fit(X2, y_season)

y_pred = pd.Series(model_2.predict(X2), index=y_season.index)
X_fore = dp_2.out_of_sample(steps=16)
y_fore = pd.Series(model_2.predict(X_fore), index=X_fore.index)

ax = y_season.plot(color='0.25', style='.', title="Sales - Seasonal Forecast")
ax = y_pred.plot(ax=ax, label="Seasonal")
ax = y_fore.plot(ax=ax, label="Seasonal Forecast", color='C3')
ax.legend()

In [None]:
y_deseason = y - y_pred

fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, sharey=True, figsize=(10, 7))
ax1 = plot_periodogram(y, ax=ax1)
ax1.set_title("Product Sales Frequency Components")
ax2 = plot_periodogram(y_deseason, ax=ax2);
ax2.set_title("Deseasonalized");

From the above graphs, I can find out seasonality and remove it from the original data set.

# 4. Holiday features

Let's try to include holiday features.

In [None]:
holidays = (
    holidays_events
    .query("locale in ['National', 'Regional']")
    .loc['2012':'2017-08-15', ['description']]
    .assign(description=lambda x: x.description.cat.remove_unused_categories())
)

#display(holidays)

In [None]:
#ax = y_deseason.plot(**plot_params)
#plt.plot_date(holidays.index, y_deseason[holidays.index], color='C3')
#ax.set_title('National and Regional Holidays');

In [None]:
X_holidays = pd.get_dummies(holidays.drop_duplicates())
X4 = X2.join(X_holidays, on='date', how='left').fillna(0.0)

In [None]:
model_4 = LinearRegression().fit(X4, y)

y_pred_4 = pd.Series(model_4.predict(X4), index=X4.index)
ax = y.plot(**plot_params, alpha=0.5, title="Average Sales")
ax = y_pred_4.plot(ax=ax, label="Seasonal")
ax.legend();

# 5. Lag Embedding

In [None]:
store_sales_5 = pd.read_csv(
    comp_dir / 'train.csv',
    usecols=['store_nbr', 'family', 'date', 'sales', 'onpromotion'],
    dtype={
        'store_nbr': 'category',
        'family': 'category',
        'sales': 'float32',
        'onpromotion': 'uint32',
    },
    parse_dates=['date'],
    infer_datetime_format=True,
)

average_sales_5 = (
    store_sales_5
    .groupby('date').mean()
    .squeeze()
)

y_5 = store_sales_5.groupby('date').mean().loc['2017', ['sales', 'onpromotion']].loc[:, 'sales'].squeeze()

In [None]:
fourier_5 = CalendarFourier(freq='W', order=6)
dp_5 = DeterministicProcess(
    constant=True,
    index=y_5.index,
    order=1,
    seasonal=True,
    drop=True,
    additional_terms=[fourier_5],
)
X_time_5 = dp_5.in_sample()
X_time_5['NewYearsDay'] = (X_time_5.index.dayofyear == 1)

model_5 = LinearRegression(fit_intercept=False)
model_5.fit(X_time_5, y_5)

y_deseason_5 = y_5 - model_5.predict(X_time_5)
y_deseason_5.name = 'sales_deseasoned'

ax = y_deseason_5.plot()
ax.set_title("Averaged Sales (deseasonalized)");

In [None]:
y_ma = y_5.rolling(7,center=True).mean()

ax = y_ma.plot()
ax.set_title("Seven-Day Moving Average");

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf

def lagplot(x, y=None, lag=1, standardize=False, ax=None, **kwargs):
    from matplotlib.offsetbox import AnchoredText
    x_ = x.shift(lag)
    if standardize:
        x_ = (x_ - x_.mean()) / x_.std()
    if y is not None:
        y_ = (y - y.mean()) / y.std() if standardize else y
    else:
        y_ = x
    corr = y_.corr(x_)
    if ax is None:
        fig, ax = plt.subplots()
    scatter_kws = dict(
        alpha=0.75,
        s=3,
    )
    line_kws = dict(color='C3', )
    ax = sns.regplot(x=x_,
                     y=y_,
                     scatter_kws=scatter_kws,
                     line_kws=line_kws,
                     lowess=True,
                     ax=ax,
                     **kwargs)
    at = AnchoredText(
        f"{corr:.2f}",
        prop=dict(size="large"),
        frameon=True,
        loc="upper left",
    )
    at.patch.set_boxstyle("square, pad=0.0")
    ax.add_artist(at)
    ax.set(title=f"Lag {lag}", xlabel=x_.name, ylabel=y_.name)
    return ax


def plot_lags(x, y=None, lags=6, nrows=1, lagplot_kwargs={}, **kwargs):
    import math
    kwargs.setdefault('nrows', nrows)
    kwargs.setdefault('ncols', math.ceil(lags / nrows))
    kwargs.setdefault('figsize', (kwargs['ncols'] * 2, nrows * 2 + 0.5))
    fig, axs = plt.subplots(sharex=True, sharey=True, squeeze=False, **kwargs)
    for ax, k in zip(fig.get_axes(), range(kwargs['nrows'] * kwargs['ncols'])):
        if k + 1 <= lags:
            ax = lagplot(x, y, lag=k + 1, ax=ax, **lagplot_kwargs)
            ax.set_title(f"Lag {k + 1}", fontdict=dict(fontsize=14))
            ax.set(xlabel="", ylabel="")
        else:
            ax.axis('off')
    plt.setp(axs[-1, :], xlabel=x.name)
    plt.setp(axs[:, 0], ylabel=y.name if y is not None else x.name)
    fig.tight_layout(w_pad=0.1, h_pad=0.1)
    return fig

In [None]:
plot_pacf(y_deseason_5, lags=12);
plot_lags(y_deseason_5, lags=12, nrows=2);

In [None]:
average_sales_5_2 = (
    store_sales_5
    .groupby('date')
    .mean() 
    .loc['2017', ['sales', 'onpromotion']]
)

onpromotion = average_sales_5_2.loc[:, 'onpromotion'].squeeze().rename('onpromotion')
plot_lags(x=onpromotion.loc[onpromotion > 1], y=y_deseason_5.loc[onpromotion > 1], lags=3, nrows=1);

# 6. Hybrid Model

In [None]:
store_sales_6 = pd.read_csv(
    comp_dir / 'train.csv',
    usecols=['store_nbr', 'family', 'date', 'sales', 'onpromotion'],
    dtype={
        'store_nbr': 'category',
        'family': 'category',
        'sales': 'float32',
    },
    parse_dates=['date'],
    infer_datetime_format=True,
)
store_sales_6['date'] = store_sales_6.date.dt.to_period('D')
store_sales_6 = store_sales_6.set_index(['store_nbr', 'family', 'date']).sort_index()

family_sales_6 = (
    store_sales_6
    .groupby(['store_nbr','family', 'date'])
    .mean()
    .unstack(['store_nbr','family'])
    .loc['2017']
)

In [None]:
class BoostedHybrid:
    def __init__(self, model_1, model_2):
        self.model_1 = model_1
        self.model_2 = model_2
        self.y_columns = None

In [None]:
def fit(self, X_1, X_2, y):
    self.model_1.fit(X_1, y)
    y_fit = pd.DataFrame(
        self.model_1.predict(X_1),
        index=X_1.index, columns=y.columns,
    )

    y_resid = y - y_fit 
    y_resid = y_resid.stack().squeeze() # wide to long
    self.model_2.fit(X_2, y_resid)
    self.y_columns = y.columns
    self.y_fit = y_fit
    self.y_resid = y_resid

BoostedHybrid.fit = fit

In [None]:
def predict(self, X_1, X_2):
    y_pred = pd.DataFrame(
        self.model_1.predict(X_1),
        index=X_1.index, columns=self.y_columns,
    )
    y_pred = y_pred.stack().squeeze()  # wide to long
    y_pred += self.model_2.predict(X_2)
    return y_pred.unstack()  # long to wide

BoostedHybrid.predict = predict

In [None]:
from sklearn.preprocessing import LabelEncoder

y_6 = family_sales_6.loc[:, 'sales']

dp_6 = DeterministicProcess(index=y_6.index, order=1)
X_6_1 = dp_6.in_sample()

X_6_2 = family_sales_6.drop('sales', axis=1).stack() 
le = LabelEncoder()
X_6_2 = X_6_2.reset_index('family')
X_6_2['family'] = le.fit_transform(X_6_2['family'])
X_6_2["day"] = X_6_2.index.day  # values are day of the month

In [None]:
from xgboost import XGBRegressor

# Create LinearRegression + XGBRegressor hybrid with BoostedHybrid
model_6 = BoostedHybrid(
    model_1=LinearRegression(),
    model_2=XGBRegressor(),
)

model_6.fit(X_6_1, X_6_2, y_6)
y_pred_6 = model_6.predict(X_6_1,X_6_2)
y_pred_6 = y_pred_6.clip(0.0)

In [None]:
y_train_6, y_valid_6 = y_6[:"2017-07-01"], y_6["2017-07-02":]
X_6_1_train, X_6_1_valid = X_6_1[: "2017-07-01"], X_6_1["2017-07-02" :]
X_6_2_train, X_6_2_valid = X_6_2.loc[:"2017-07-01"], X_6_2.loc["2017-07-02":]

model_6.fit(X_6_1_train, X_6_2_train, y_train_6)
y_fit_6 = model_6.predict(X_6_1_train, X_6_2_train).clip(0.0)
y_pred_6 = model_6.predict(X_6_1_valid, X_6_2_valid).clip(0.0)

families_6 = y_6.columns[0:6]
axs = y_6.loc(axis=1)[families_6].plot(
    subplots=True, sharex=True, figsize=(11, 9), **plot_params, alpha=0.5,
)
_ = y_fit_6.loc(axis=1)[families_6].plot(subplots=True, sharex=True, color='C0', ax=axs)
_ = y_pred_6.loc(axis=1)[families_6].plot(subplots=True, sharex=True, color='C3', ax=axs)
for ax, family in zip(axs, families_6):
    ax.legend([])
    ax.set_ylabel(family)

# 7. Hybrid Model - 2

In [None]:
from sklearn.linear_model import ElasticNet, Lasso, Ridge
from sklearn.neighbors import KNeighborsRegressor

model_7 = BoostedHybrid(
    model_1=Ridge(),
    model_2=KNeighborsRegressor(),
)

model_7.fit(X_6_1, X_6_2, y_6)
y_pred_7 = model_7.predict(X_6_1,X_6_2)
y_pred_7 = y_pred_7.clip(0.0)

In [None]:
model_7.fit(X_6_1_train, X_6_2_train, y_train_6)
y_fit_7 = model_7.predict(X_6_1_train, X_6_2_train).clip(0.0)
y_pred_7 = model_7.predict(X_6_1_valid, X_6_2_valid).clip(0.0)

families_7 = y_6.columns[0:6]
axs = y_6.loc(axis=1)[families_7].plot(
    subplots=True, sharex=True, figsize=(11, 9), **plot_params, alpha=0.5,
)
_ = y_fit_6.loc(axis=1)[families_7].plot(subplots=True, sharex=True, color='C0', ax=axs)
_ = y_pred_6.loc(axis=1)[families_7].plot(subplots=True, sharex=True, color='C3', ax=axs)
for ax, family in zip(axs, families_7):
    ax.legend([])
    ax.set_ylabel(family)

# 8. Multistep Forecasting - DirRec strategy

In [None]:
def make_lags(ts, lags, lead_time=1):
    return pd.concat(
        {
            f'y_lag_{i}': ts.shift(i)
            for i in range(lead_time, lags + lead_time)
        },
        axis=1)

def make_multistep_target(ts, steps):
    return pd.concat(
        {f'y_step_{i + 1}': ts.shift(-i)
         for i in range(steps)},
        axis=1)

In [None]:
store_sales_8 = pd.read_csv(
    comp_dir / 'train.csv',
    usecols=['store_nbr', 'family', 'date', 'sales', 'onpromotion'],
    dtype={
        'store_nbr': 'category',
        'family': 'category',
        'sales': 'float32',
        'onpromotion': 'uint32',
    },
    parse_dates=['date'],
    infer_datetime_format=True,
)
store_sales_8['date'] = store_sales_8.date.dt.to_period('D')
store_sales_8 = store_sales_8.set_index(['store_nbr', 'family', 'date']).sort_index()

family_sales_8 = (
    store_sales_8
    .groupby(['family', 'date'])
    .mean()
    .unstack('family')
    .loc['2017']
)

In [None]:
y_8 = family_sales_8.loc[:, 'sales']
#print(y_8)
X_8 = make_lags(y_8, lags=4).dropna()
#print(X_8)
y_8 = make_multistep_target(y_8, steps=16).dropna()
#print(X_8)
#print(y_8)
y_8, X_8 = y_8.align(X_8, join='inner', axis=0)
#print(X_8)
#print(y_8)

In [None]:
le_8 = LabelEncoder()
X_8 = (X_8
    .stack('family')  # wide to long
    .reset_index('family')  # convert index to column
    .assign(family=lambda x: le_8.fit_transform(x.family))  # label encode
)
y_8 = y_8.stack('family')  # wide to long

#display(y_8)

In [None]:
from sklearn.multioutput import RegressorChain
model_8 = RegressorChain(base_estimator=XGBRegressor())

In [None]:
model_8.fit(X_8, y_8)

y_pred_8 = pd.DataFrame(
    model_8.predict(X_8),
    index=y_8.index,
    columns=y_8.columns,
).clip(0.0)

In [None]:
plot_params = dict(
    color="0.75",
    style=".-",
    markeredgecolor="0.25",
    markerfacecolor="0.25",
)

def plot_multistep(y, every=1, ax=None, palette_kwargs=None):
    palette_kwargs_ = dict(palette='husl', n_colors=16, desat=None)
    if palette_kwargs is not None:
        palette_kwargs_.update(palette_kwargs)
    palette = sns.color_palette(**palette_kwargs_)
    if ax is None:
        fig, ax = plt.subplots()
    ax.set_prop_cycle(plt.cycler('color', palette))
    for date, preds in y[::every].iterrows():
        preds.index = pd.period_range(start=date, periods=len(preds))
        preds.plot(ax=ax)
    return ax

In [None]:
FAMILY = 'BEAUTY'
START = '2017-04-01'
EVERY = 16

y_pred_8_ = y_pred_8.xs(FAMILY, level='family', axis=0).loc[START:]
y_8_ = family_sales_8.loc[START:, 'sales'].loc[:, FAMILY]

fig, ax = plt.subplots(1, 1, figsize=(11, 4))
ax = y_8_.plot(**plot_params, ax=ax, alpha=0.5)
ax = plot_multistep(y_pred_8_, ax=ax, every=EVERY)
_ = ax.legend([FAMILY, FAMILY + ' Forecast'])

# Submit for seasonality model

In [None]:
y_final = store_sales.unstack(['store_nbr', 'family']).loc["2017"]

fourier = CalendarFourier(freq="W", order=6)  # 6 sin/cos pairs for "A"nnual seasonality
dp_f = DeterministicProcess(
    index=y_final.index,
    constant=True,               # dummy feature for bias (y-intercept)
    order=1,                     # trend
    seasonal=True,               # weekly seasonality (indicators)
    additional_terms=[fourier],  # annual seasonality (fourier)
    drop=True,                   # drop terms to avoid collinearity
)

X_f = dp_f.in_sample()
X_f['NewYear'] = (X_f.index.dayofyear == 1)

model_f = LinearRegression(fit_intercept=False)
model_f.fit(X_f, y_final)

In [None]:
df_test = pd.read_csv(
    comp_dir / 'test.csv',
    dtype={
        'store_nbr': 'category',
        'family': 'category',
        'onpromotion': 'uint32',
    },
    parse_dates=['date'],
    infer_datetime_format=True,
)

df_test['date'] = df_test.date.dt.to_period('D')
df_test = df_test.set_index(['store_nbr', 'family', 'date']).sort_index()

# Create features for test set
X_test_final = dp_f.out_of_sample(steps=16)
X_test_final.index.name = 'date'
X_test_final['NewYear'] = (X_test_final.index.dayofyear == 1)


In [None]:
y_submit = pd.DataFrame(model_f.predict(X_test_final), index=X_test_final.index, columns=y_final.columns )
y_submit = y_submit.stack(['store_nbr', 'family'])
y_submit = y_submit.join(df_test.id).reindex(columns=['id', 'sales'])
y_submit.to_csv('submission.csv', index=False)


# Submit for Hybrid model

In [None]:
y_final = store_sales.unstack(['store_nbr', 'family']).loc["2017"]

In [None]:
X_6_1_f = dp_6.out_of_sample(steps=16)

df_test_f = df_test.copy()
X_6_2_f = df_test_f.groupby(['store_nbr','family','date']).mean().unstack(['store_nbr','family'])

X_6_2_f = X_6_2_f.drop('id', axis=1).stack() 
le_f = LabelEncoder()  
X_6_2_f = X_6_2_f.reset_index('family')
X_6_2_f['family'] = le_f.fit_transform(X_6_2_f['family'])

X_6_2_f["day"] = X_6_2_f.index.day


In [None]:
#df_test_f

In [None]:
# change "model_6" or "model_7" by hybrid models
y_pred_6_submit = model_7.predict(X_6_1_f,X_6_2_f)

y_pred_6_submit = y_pred_6_submit.stack(['store_nbr', 'family'])
y_pred_6_submit = y_pred_6_submit.to_frame(name="sales")
y_pred_6_submit.index.names=['date', 'store_nbr','family']

y_pred_6_submit = y_pred_6_submit.join(df_test.id).reindex(columns=['id', 'sales'])
#y_pred_6_submit.to_csv('submission.csv', index=False)

# Submit for DirRec Strategy

In [None]:
#y_8_f = df_test.groupby(['family', 'date']).mean().unstack('family').loc['2017'].loc[:, 'id']
y_8_f = store_sales.unstack(['store_nbr', 'family']).loc["2017"]
#print(y_8_f)
X_8_f = make_lags(y_8_f, lags=4).dropna()
#print(X_8_f)
y_8_f = make_multistep_target(y_8_f, steps=16).dropna()
#print(X_8_f)
#print(y_8_f)
y_8_f, X_8_f = y_8_f.align(X_8_f, join='outer', axis=0)
#print(X_8_f)
#print(y_8_f)

In [None]:
#le_8 = LabelEncoder()
X_8_f = (X_8_f
    .stack('family')  # wide to long
    .reset_index('family')  # convert index to column
    .assign(family=lambda x: le_8.fit_transform(x.family))  # label encode
)
y_8_f = y_8_f.stack('family')  # wide to long

In [None]:
#y_pred_8_submit = model_8.predict(X_8_f)
"""
model_8.fit(X_8_f, y_8_f)

y_pred_8_submit = pd.DataFrame(
    model_8.predict(X_8_f),
    index=y_8_f.index,
    columns=y_8_f.columns,
).clip(0.0)
"""

In [None]:
"""
y_pred_8_submit = y_pred_8_submit.stack(['store_nbr', 'family'])
y_pred_8_submit = y_pred_8_submit.to_frame(name="sales")
y_pred_8_submit.index.names=['date', 'store_nbr','family']

y_pred_8_submit = y_pred_8_submit.join(df_test.id).reindex(columns=['id', 'sales'])
"""