# **Import Libraries**

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import describe
pd.options.display.max_columns = 12
pd.options.display.max_rows = 24
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
# use svg for all plots within inline backend
%config InlineBackend.figure_format = 'svg'
from pylab import rcParams
rcParams['figure.figsize'] = 5, 4
import pandas as pd
pd.options.display.max_columns = 12
pd.options.display.max_rows = 24

# plots inisde jupyter notebook
%matplotlib inline
import matplotlib.pyplot as plt

import seaborn as sns
sns.set()

# use svg for all plots within inline backend
%config InlineBackend.figure_format = 'svg'

# increase default plot size
from pylab import rcParams
rcParams['figure.figsize'] = 5, 4

# Load data

In [2]:
# Read csv data
df_train = pd.read_csv('/content/train.csv')
df_train.head()
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 913000 entries, 0 to 912999
Data columns (total 4 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   date    913000 non-null  object
 1   store   913000 non-null  int64 
 2   item    913000 non-null  int64 
 3   sales   913000 non-null  int64 
dtypes: int64(3), object(1)
memory usage: 27.9+ MB


# Data Preprocessing
## Convert strings to dates

In [3]:
df_train['date'] = pd.to_datetime(df_train['date'])
df_train.index = pd.DatetimeIndex(df_train['date'])
df_train.drop('date', axis=1, inplace=True)

In [4]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 913000 entries, 2013-01-01 to 2017-12-31
Data columns (total 3 columns):
 #   Column  Non-Null Count   Dtype
---  ------  --------------   -----
 0   store   913000 non-null  int64
 1   item    913000 non-null  int64
 2   sales   913000 non-null  int64
dtypes: int64(3)
memory usage: 27.9 MB


In [5]:
df_train.head()

Unnamed: 0_level_0,store,item,sales
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-01,1,1,13
2013-01-02,1,1,11
2013-01-03,1,1,14
2013-01-04,1,1,13
2013-01-05,1,1,10


## Sales for each storeitem
#### Transforming row values to columns corresponding to uniqe item per store

In [6]:
from itertools import product, starmap


def storeitems():
    return product(range(1,51), range(1,11))


def storeitems_column_names():
    return list(starmap(lambda i,s: f'item_{i}_store_{s}_sales', storeitems()))


def sales_by_storeitem(df):
    ret = pd.DataFrame(index=df.index.unique())
    for i, s in storeitems():
        ret[f'item_{i}_store_{s}_sales'] = df[(df['item'] == i) & (df['store'] == s)]['sales'].values
    return ret

In [7]:
df_train = sales_by_storeitem(df_train)

In [8]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1826 entries, 2013-01-01 to 2017-12-31
Columns: 500 entries, item_1_store_1_sales to item_50_store_10_sales
dtypes: int64(500)
memory usage: 7.0 MB


In [9]:
df_train.head()

Unnamed: 0_level_0,item_1_store_1_sales,item_1_store_2_sales,item_1_store_3_sales,item_1_store_4_sales,item_1_store_5_sales,item_1_store_6_sales,...,item_50_store_5_sales,item_50_store_6_sales,item_50_store_7_sales,item_50_store_8_sales,item_50_store_9_sales,item_50_store_10_sales
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2013-01-01,13,12,19,10,11,20,...,19,20,21,45,36,33
2013-01-02,11,16,8,12,9,6,...,25,23,30,54,44,37
2013-01-03,14,16,10,8,12,11,...,28,38,20,54,29,46
2013-01-04,13,20,15,15,8,7,...,27,33,27,52,43,51
2013-01-05,10,16,22,19,13,12,...,31,33,18,48,53,41


# Load test data and preprocess
## Test data


In [10]:
# load data
df_test = pd.read_csv('/content/test.csv')
df_test.head()

Unnamed: 0,id,date,store,item
0,0,2018-01-01,1,1
1,1,2018-01-02,1,1
2,2,2018-01-03,1,1
3,3,2018-01-04,1,1
4,4,2018-01-05,1,1


In [11]:
# strings to dates
df_test['date'] = pd.to_datetime(df_test['date'])
df_test.index = pd.DatetimeIndex(df_test['date'])
df_test.drop('date', axis=1, inplace=True)
df_test.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 45000 entries, 2018-01-01 to 2018-03-31
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   id      45000 non-null  int64
 1   store   45000 non-null  int64
 2   item    45000 non-null  int64
dtypes: int64(3)
memory usage: 1.4 MB


In [12]:
# mock sales to use same transformations as in df_train
df_test['sales'] = np.zeros(df_test.shape[0])
df_test = sales_by_storeitem(df_test)
df_test.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 90 entries, 2018-01-01 to 2018-03-31
Columns: 500 entries, item_1_store_1_sales to item_50_store_10_sales
dtypes: float64(500)
memory usage: 352.3 KB


In [13]:
df_test.head()

Unnamed: 0_level_0,item_1_store_1_sales,item_1_store_2_sales,item_1_store_3_sales,item_1_store_4_sales,item_1_store_5_sales,item_1_store_6_sales,...,item_50_store_5_sales,item_50_store_6_sales,item_50_store_7_sales,item_50_store_8_sales,item_50_store_9_sales,item_50_store_10_sales
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2018-01-01,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0
2018-01-02,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0
2018-01-03,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0
2018-01-04,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0
2018-01-05,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0


## Combine test and train datasets

In [14]:
# make sure all column names are the same and in the same order
col_names = list(zip(df_test.columns, df_train.columns))
for cn in col_names:
    assert cn[0] == cn[1]

In [15]:
df_test['is_test'] = np.repeat(True, df_test.shape[0])
df_train['is_test'] = np.repeat(False, df_train.shape[0])
df_total = pd.concat([df_train, df_test])
df_total.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1916 entries, 2013-01-01 to 2018-03-31
Columns: 501 entries, item_1_store_1_sales to is_test
dtypes: bool(1), float64(500)
memory usage: 7.3 MB


## Onehot encoding weekdays and months

In [16]:
weekday_df = pd.get_dummies(df_total.index.weekday, prefix='weekday')
weekday_df.index = df_total.index
weekday_df.head()

Unnamed: 0_level_0,weekday_0,weekday_1,weekday_2,weekday_3,weekday_4,weekday_5,weekday_6
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-01-01,0,1,0,0,0,0,0
2013-01-02,0,0,1,0,0,0,0
2013-01-03,0,0,0,1,0,0,0
2013-01-04,0,0,0,0,1,0,0
2013-01-05,0,0,0,0,0,1,0


In [17]:
month_df = pd.get_dummies(df_total.index.month, prefix='month')
month_df.index =  df_total.index
month_df.head()

Unnamed: 0_level_0,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11,month_12
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2013-01-01,1,0,0,0,0,0,0,0,0,0,0,0
2013-01-02,1,0,0,0,0,0,0,0,0,0,0,0
2013-01-03,1,0,0,0,0,0,0,0,0,0,0,0
2013-01-04,1,0,0,0,0,0,0,0,0,0,0,0
2013-01-05,1,0,0,0,0,0,0,0,0,0,0,0


In [18]:
df_total = pd.concat([weekday_df, month_df, df_total], axis=1)
df_total.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1916 entries, 2013-01-01 to 2018-03-31
Columns: 520 entries, weekday_0 to is_test
dtypes: bool(1), float64(500), uint8(19)
memory usage: 7.4 MB


In [19]:
assert df_total.isna().any().any() == False

## Shift sales 

Calculate the first values of the test set.
Shift columns in df with names in series_names by days_delta. Negative days_delta will prepend future values to current date, positive days_delta wil prepend past values to current date.

In [20]:
def shift_series(series, days):
    return series.transform(lambda x: x.shift(days))


def shift_series_in_df(df, series_names=[], days_delta=90):

    ret = pd.DataFrame(index=df.index.copy())
    str_sgn = 'future' if np.sign(days_delta) < 0 else 'past'
    for sn in series_names:
        ret[f'{sn}_{str_sgn}_{np.abs(days_delta)}'] = shift_series(df[sn], days_delta)
    return ret

    
def stack_shifted_sales(df, days_delta=90):
    names = storeitems_column_names()
    dfs = [df.copy()]
    abs_range = range(1, days_delta+1) if days_delta > 0 else range(days_delta, 0)
    for day_offset in abs_range:
        delta = -day_offset
        shifted = shift_series_in_df(df, series_names=names, days_delta=delta)
        dfs.append(shifted)
    return pd.concat(dfs, axis=1, copy=False)

In [21]:
df_total = stack_shifted_sales(df_total, days_delta=-1)

In [22]:
df_total = df_total.dropna()  # remove 1st row
df_total.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1915 entries, 2013-01-02 to 2018-03-31
Columns: 1020 entries, weekday_0 to item_50_store_10_sales_past_1
dtypes: bool(1), float64(1000), uint8(19)
memory usage: 14.7 MB


In [23]:
# make sure stacked and standard sales columns appear in the same order:
sales_cols = [col for col in df_total.columns if '_sales' in col and '_sales_' not in col]
stacked_sales_cols = [col for col in df_total.columns if '_sales_' in col]
other_cols = [col for col in df_total.columns if col not in set(sales_cols) and col not in set(stacked_sales_cols)]

sales_cols = sorted(sales_cols)
stacked_sales_cols = sorted(stacked_sales_cols)

new_cols = other_cols + stacked_sales_cols + sales_cols

In [24]:
df_total = df_total.reindex(columns=new_cols)

In [25]:
df_total.head()

Unnamed: 0_level_0,weekday_0,weekday_1,weekday_2,weekday_3,weekday_4,weekday_5,...,item_9_store_4_sales,item_9_store_5_sales,item_9_store_6_sales,item_9_store_7_sales,item_9_store_8_sales,item_9_store_9_sales
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2013-01-02,0,0,1,0,0,0,...,21.0,20.0,17.0,20.0,28.0,36.0
2013-01-03,0,0,0,1,0,0,...,25.0,15.0,28.0,18.0,31.0,25.0
2013-01-04,0,0,0,0,1,0,...,37.0,20.0,33.0,24.0,46.0,31.0
2013-01-05,0,0,0,0,0,1,...,37.0,23.0,27.0,14.0,35.0,30.0
2013-01-06,0,0,0,0,0,0,...,37.0,29.0,20.0,24.0,34.0,35.0


In [26]:
df_total.tail()

Unnamed: 0_level_0,weekday_0,weekday_1,weekday_2,weekday_3,weekday_4,weekday_5,...,item_9_store_4_sales,item_9_store_5_sales,item_9_store_6_sales,item_9_store_7_sales,item_9_store_8_sales,item_9_store_9_sales
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2018-03-27,0,1,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0
2018-03-28,0,0,1,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0
2018-03-29,0,0,0,1,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0
2018-03-30,0,0,0,0,1,0,...,0.0,0.0,0.0,0.0,0.0,0.0
2018-03-31,0,0,0,0,0,1,...,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
df_total.describe()

Unnamed: 0,weekday_0,weekday_1,weekday_2,weekday_3,weekday_4,weekday_5,...,item_9_store_4_sales,item_9_store_5_sales,item_9_store_6_sales,item_9_store_7_sales,item_9_store_8_sales,item_9_store_9_sales
count,1915.0,1915.0,1915.0,1915.0,1915.0,1915.0,...,1915.0,1915.0,1915.0,1915.0,1915.0,1915.0
mean,0.142559,0.142559,0.143081,0.143081,0.143081,0.143081,...,51.522193,37.02141,37.613055,34.219321,60.241253,51.577023
std,0.349714,0.349714,0.350247,0.350247,0.350247,0.350247,...,18.707446,13.864136,13.968996,12.788732,21.714863,18.706826
min,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,...,41.0,29.0,30.0,27.0,48.0,41.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,...,52.0,38.0,38.0,35.0,61.0,52.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,...,64.0,46.0,46.0,42.0,74.0,63.0
max,1.0,1.0,1.0,1.0,1.0,1.0,...,111.0,84.0,81.0,78.0,134.0,110.0


In [28]:
assert df_total.isna().any().any() == False

## Scaling
With combined datasets and shifted sales, we can now correctly min-max scale all data.

In [29]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [30]:
cols_to_scale = [col for col in df_total.columns if 'weekday' not in col and 'month' not in col]

In [31]:
scaler = MinMaxScaler(feature_range=(0,1))
scaled_cols = scaler.fit_transform(df_total[cols_to_scale])
df_total[cols_to_scale] = scaled_cols
df_total.head()

Unnamed: 0_level_0,weekday_0,weekday_1,weekday_2,weekday_3,weekday_4,weekday_5,...,item_9_store_4_sales,item_9_store_5_sales,item_9_store_6_sales,item_9_store_7_sales,item_9_store_8_sales,item_9_store_9_sales
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2013-01-02,0,0,1,0,0,0,...,0.189189,0.238095,0.209877,0.25641,0.208955,0.327273
2013-01-03,0,0,0,1,0,0,...,0.225225,0.178571,0.345679,0.230769,0.231343,0.227273
2013-01-04,0,0,0,0,1,0,...,0.333333,0.238095,0.407407,0.307692,0.343284,0.281818
2013-01-05,0,0,0,0,0,1,...,0.333333,0.27381,0.333333,0.179487,0.261194,0.272727
2013-01-06,0,0,0,0,0,0,...,0.333333,0.345238,0.246914,0.307692,0.253731,0.318182


In [32]:
df_total.describe()

Unnamed: 0,weekday_0,weekday_1,weekday_2,weekday_3,weekday_4,weekday_5,...,item_9_store_4_sales,item_9_store_5_sales,item_9_store_6_sales,item_9_store_7_sales,item_9_store_8_sales,item_9_store_9_sales
count,1915.0,1915.0,1915.0,1915.0,1915.0,1915.0,...,1915.0,1915.0,1915.0,1915.0,1915.0,1915.0
mean,0.142559,0.142559,0.143081,0.143081,0.143081,0.143081,...,0.464164,0.440731,0.464359,0.438709,0.449562,0.468882
std,0.349714,0.349714,0.350247,0.350247,0.350247,0.350247,...,0.168536,0.165049,0.172457,0.163958,0.162051,0.170062
min,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,...,0.369369,0.345238,0.37037,0.346154,0.358209,0.372727
50%,0.0,0.0,0.0,0.0,0.0,0.0,...,0.468468,0.452381,0.469136,0.448718,0.455224,0.472727
75%,0.0,0.0,0.0,0.0,0.0,0.0,...,0.576577,0.547619,0.567901,0.538462,0.552239,0.572727
max,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0


## Split data to train and test set
`df_total` will be still available - it will be necessary to reverse scaling on output data (sales predictions from the model)

In [33]:
df_train = df_total[df_total['is_test'] == False].drop('is_test', axis=1)
df_test = df_total[df_total['is_test'] == True].drop('is_test', axis=1)

In [34]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1825 entries, 2013-01-02 to 2017-12-31
Columns: 1019 entries, weekday_0 to item_9_store_9_sales
dtypes: float64(1000), uint8(19)
memory usage: 14.0 MB


## Split to X and y (for training)

In [35]:
X_cols_stacked = [col for col in df_train.columns if '_past_' in col]
X_cols_caldata = [col for col in df_train.columns if 'weekday_' in col or 'month_' in col]
X_cols = X_cols_stacked + X_cols_caldata

X = df_train[X_cols]

In [36]:
X_colset = set(X_cols)
y_cols = [col for col in df_train.columns if col not in X_colset]

y = df_train[y_cols]

In [37]:
X.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1825 entries, 2013-01-02 to 2017-12-31
Columns: 519 entries, item_10_store_10_sales_past_1 to month_12
dtypes: float64(500), uint8(19)
memory usage: 7.0 MB


In [38]:
y.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1825 entries, 2013-01-02 to 2017-12-31
Columns: 500 entries, item_10_store_10_sales to item_9_store_9_sales
dtypes: float64(500)
memory usage: 7.0 MB


## Shape for Keras LSTM

In [39]:
# split values to train and test, use np arrays to allow reshaping
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, shuffle=False)

In [40]:
# reshape inputs to be 3d, as in: https://machinelearningmastery.com/multivariate-time-series-forecasting-lstms-keras/
X_train_vals = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_valid_vals = X_valid.values.reshape((X_valid.shape[0], 1, X_valid.shape[1]))

## Training LSTM
Using features for all storeitems (stacked sales from previous day) to predict sales for one storeitem (sales for current day).


In [41]:
from keras.models import Sequential, Model
from keras.layers import *

In [42]:
# model alternative 2 - double with conv1ds TODO
# https://arxiv.org/pdf/1709.05206.pdf
def build_model():
    inputs = Input(shape=(X_train_vals.shape[1], X_train_vals.shape[2]))
    # top pipeline
    top_lstm = LSTM(500, return_sequences=True)(inputs)
    top_dense = Dense(500, activation='relu')(top_lstm)
    # bottom pipeline
    bottom_dense = Dense(500)(inputs)
    bottom_conv1 = Conv1D(
        500, 
        kernel_size=1,
        input_shape=(X_train_vals.shape[1], X_train_vals.shape[2])
    )(bottom_dense)
    bottom_conv2 = Conv1D(
        1000,
        kernel_size=50,
        padding='same',
        activation='relu'
    )(bottom_conv1)
    bottom_conv3 = Conv1D(
        500,
        kernel_size=10,
        padding='same',
        activation='relu'
    )(bottom_conv2)
    bottom_pooling = AvgPool1D(
        pool_size=10, 
        padding='same'
    )(bottom_conv3)
#     bottom_reshape = Reshape(
#         target_shape=[500]
#     )(bottom_conv3)
    # concat output
    final_concat = Concatenate()([top_dense, bottom_pooling])
    final_lstm = LSTM(1000, dropout=0.2)(final_concat)
    final_dense = Dense(500)(final_lstm)
    # compile and return
    model = Model(inputs=inputs, outputs=final_dense)
    model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mape'])
    return model

model = build_model()

In [6]:
%%timeit
history = model.fit(
      X_train_vals, 
      y_train.values, 
      epochs=130, 
      batch_size=70,
      validation_data=(X_valid_vals, y_valid.values),
      verbose=2,
      shuffle=False
)

NameError: ignored

In [None]:
%%timeit
with tf.device('/device:GPU:0'):
  history = model.fit(
      X_train_vals, 
      y_train.values, 
      epochs=130, 
      batch_size=70,
      validation_data=(X_valid_vals, y_valid.values),
      verbose=2,
      shuffle=False
  )

## Model Evaluation
Calculate SMAPE for the model
SMAPE is calculated on a validation set for the model

In [None]:
def model_eval(model, X_test, y_test, log_all=False):
    """
    Model must have #predict method.
    X_test, y_test - instances of pd.DataFrame
    
    Note that this function assumes that sales columns for previous values appear 
    in the same order as sales columns for current values.
    """
    # prepare data
    sales_x_cols = [col for col in X_test.columns if 'sales' in col]
    sales_x_idxs = [X_test.columns.get_loc(col) for col in sales_x_cols]
    sales_y_cols = [col for col in y_test.columns if 'sales' in col]
    sales_y_idxs = [y_test.columns.get_loc(col) for col in sales_y_cols]
    n_samples = y_test.shape[0]
    y_pred = np.zeros(y_test.shape)
    # iterate
    x_next = X_test.iloc[0].values
    for i in range(0, n_samples):
        if log_all:
            print('[x]', x_next)
        x_arr = np.array([x_next])
        x_arr = x_arr.reshape(x_arr.shape[0], 1, x_arr.shape[1])
        y_pred[i] = model.predict(x_arr)[0]
        try:
            x_next = X_test.iloc[i+1].values
            x_next[sales_x_idxs] = y_pred[i][sales_y_idxs]
        except IndexError:
            pass  # this happens on last iteration, and x_next does not matter anymore
    return y_pred, y_test.values

def vector_smape(y_pred, y_real):
    nom = np.abs(y_pred-y_real)
    denom = (np.abs(y_pred) + np.abs(y_real)) / 2
    results = nom / denom
    return 100*np.mean(results)  # in percent, same as at kaggle

In [None]:
def unscale(y_arr, scaler, template_df, toint=False):
    """
    Unscale array y_arr of model predictions, based on a scaler fitted 
    to template_df.
    """
    tmp = template_df.copy()
    tmp[y_cols] = pd.DataFrame(y_arr, index=tmp.index)
    tmp[cols_to_scale] = scaler.inverse_transform(tmp[cols_to_scale])
    if toint:
        return tmp[y_cols].astype(int)
    return tmp[y_cols]

In [None]:
X_valid, y_valid = X_valid.head(90), y_valid.head(90)

In [None]:
%%timeit
y_pred, y_real = model_eval(model, X_valid, y_valid)

In [None]:
%%timeit
with tf.device('/device:GPU:0'):
  y_pred, y_real = model_eval(model, X_valid, y_valid)

In [None]:
template_df = pd.concat([X_valid, y_valid], axis=1)
template_df['is_test'] = np.repeat(True, template_df.shape[0])

pred = unscale(y_pred, scaler, template_df, toint=True)
real = unscale(y_real, scaler, template_df, toint=True)

Calculate SMAPE for each item.

In [None]:
smapes = [vector_smape(pred[col], real[col]) for col in pred.columns]

In [None]:
sns.distplot(smapes)

In [None]:
describe(smapes)

 ## Submitting final results
Train model on test set (already loaded and formatted) and save results in Kaggle format.

In [None]:
# make sure 1st row has correctly stacked sales
df_test[stacked_sales_cols].head(2)

In [None]:
# split to X and y
X_test, y_test = df_test[X_cols], df_test[y_cols]

In [None]:
# y_test is basically blank, but allows us to use the same function
y_test_pred, _ = model_eval(model, X_test, y_test)

In [None]:
test_template_df = pd.concat([X_test, y_test], axis=1)
test_template_df['is_test'] = np.repeat(True, test_template_df.shape[0])

test_pred = unscale(y_test_pred, scaler, test_template_df, toint=True)

In [None]:
test_pred.head()

In [None]:
plt.plot(test_pred['item_1_store_1_sales'].values)
plt.show()

In [None]:
result = np.zeros(45000, dtype=np.int)
for i, s in storeitems():
    slice_start_idx = 90*10*(i-1) + 90*(s-1)
    slice_end_idx = slice_start_idx + 90
    col_name = f'item_{i}_store_{s}_sales'
    result[slice_start_idx:slice_end_idx] = test_pred[col_name].values
result = pd.DataFrame(result, columns=['sales'])
result.index.name = 'id'
result.head()

result.to_csv('/content/submission.csv')