In [62]:
import keras
from keras.layers import Dense
from keras.models import Sequential
from keras.utils import to_categorical
from keras.optimizers import SGD,Adadelta,Adam,RMSprop 
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from keras import optimizers
import itertools
from keras.layers import LSTM
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import Dropout
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [55]:
def series_to_supervised(data, window=1, lag=1, dropnan=True):
    cols, names = list(), list()
    # Input sequence (t-n, ... t-1)
    for i in range(window, 0, -1):
        cols.append(data.shift(i))
        names += [('%s(t-%d)' % (col, i)) for col in data.columns]
    # Current timestep (t=0)
    cols.append(data)
    names += [('%s(t)' % (col)) for col in data.columns]
    # Target timestep (t=lag)
    cols.append(data.shift(-lag))
    names += [('%s(t+%d)' % (col, lag)) for col in data.columns]
    # Put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # Drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

def get_sales():
    # Read data
    df = pd.read_csv(
        '../data/sales_top50groups.csv'
        # 'data/product_salestop50.csv'
    )
    
    df.start_date = pd.to_datetime(df.start_date, format='%m/%d/%Y').dt.date
    df = df.rename(columns={'start_date': 'date'})
    df = df.sort_values(by=['object_id', 'natural_week'], ascending=[True, True])

    df.set_index('date', inplace=True)

    return df

In [56]:
sales = get_sales()


window = 29
lag = 4

series = pd.DataFrame()

for obj in sales.object_id.unique():

    obj_series = series_to_supervised(
        sales[sales.object_id == obj][['quantity']], window=window, lag=lag)
    obj_series['object_id'] = obj
    series = series.append(obj_series)

# series.head()

In [61]:
# Label
labels_col = 'quantity(t+%d)' % lag

epochs = 40
batch = 256
lr = 0.0003
adam = optimizers.Adam(lr)

for obj in series.object_id.unique():

    obj_series = series[series.object_id == obj]
    labels = obj_series[labels_col]
    obj_series = obj_series.drop(labels_col, axis=1)
    x_train, x_valid, y_train, y_valid = train_test_split(obj_series, labels.values, test_size=0.2, random_state=0)
    print('Train set shape', x_train.shape)
    print('Validation set shape', x_valid.shape)

    model_mlp = Sequential()
    model_mlp.add(Dense(100, activation='relu', input_dim=x_train.shape[1]))
    model_mlp.add(Dense(1))
    model_mlp.compile(loss='mse', optimizer=adam)
    model_mlp.summary()

    mlp_history = model_mlp.fit(x_train.values, y_train, validation_data=(x_valid.values, y_valid), epochs=epochs, verbose=2)
    



Train set shape (96, 31)
Validation set shape (25, 31)
Index(['quantity(t-29)', 'quantity(t-28)', 'quantity(t-27)', 'quantity(t-26)',
       'quantity(t-25)', 'quantity(t-24)', 'quantity(t-23)', 'quantity(t-22)',
       'quantity(t-21)', 'quantity(t-20)', 'quantity(t-19)', 'quantity(t-18)',
       'quantity(t-17)', 'quantity(t-16)', 'quantity(t-15)', 'quantity(t-14)',
       'quantity(t-13)', 'quantity(t-12)', 'quantity(t-11)', 'quantity(t-10)',
       'quantity(t-9)', 'quantity(t-8)', 'quantity(t-7)', 'quantity(t-6)',
       'quantity(t-5)', 'quantity(t-4)', 'quantity(t-3)', 'quantity(t-2)',
       'quantity(t-1)', 'quantity(t)', 'object_id'],
      dtype='object')
Train set shape (97, 31)
Validation set shape (25, 31)
Index(['quantity(t-29)', 'quantity(t-28)', 'quantity(t-27)', 'quantity(t-26)',
       'quantity(t-25)', 'quantity(t-24)', 'quantity(t-23)', 'quantity(t-22)',
       'quantity(t-21)', 'quantity(t-20)', 'quantity(t-19)', 'quantity(t-18)',
       'quantity(t-17)', 'quantit

In [54]:
X_train

Unnamed: 0_level_0,quantity(t-29),quantity(t-28),quantity(t-27),quantity(t-26),quantity(t-25),quantity(t-24),quantity(t-23),quantity(t-22),quantity(t-21),quantity(t-20),...,quantity(t-8),quantity(t-7),quantity(t-6),quantity(t-5),quantity(t-4),quantity(t-3),quantity(t-2),quantity(t-1),quantity(t),object_id
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-06-28,38.0,80.0,119.0,118.0,126.0,142.0,145.0,88.0,85.0,100.0,...,109.0,137.0,81.0,99.0,81.0,91.0,94.0,80.0,71,1_6_2_14
2021-08-22,335.0,327.0,316.0,262.0,296.0,268.0,209.0,204.0,185.0,220.0,...,208.0,191.0,216.0,213.0,230.0,216.0,238.0,208.0,240,2_6_1_9
2019-10-20,73.0,22.0,31.0,11.0,18.0,6.0,36.0,87.0,180.0,200.0,...,37.0,38.0,32.0,34.0,36.0,38.0,21.0,19.0,21,1_3_9_8
2020-01-05,371.0,364.0,421.0,471.0,507.0,554.0,598.0,700.0,695.0,246.0,...,426.0,402.0,380.0,420.0,344.0,324.0,377.0,372.0,518,2_6_2_9
2021-08-29,301.0,267.0,232.0,165.0,103.0,62.0,47.0,53.0,43.0,37.0,...,73.0,76.0,82.0,74.0,96.0,104.0,133.0,161.0,344,4_2_7_12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-22,170.0,196.0,244.0,217.0,255.0,305.0,270.0,238.0,105.0,104.0,...,206.0,173.0,166.0,143.0,130.0,144.0,376.0,346.0,405,3_6_2_12
2021-09-26,299.0,225.0,229.0,289.0,334.0,279.0,289.0,233.0,235.0,248.0,...,245.0,295.0,267.0,241.0,232.0,229.0,217.0,231.0,226,2_2_1_9
2020-02-23,70.0,81.0,58.0,61.0,76.0,70.0,61.0,58.0,100.0,91.0,...,11.0,17.0,4.0,18.0,39.0,74.0,89.0,90.0,86,1_6_1_14
2020-05-24,449.0,339.0,293.0,169.0,256.0,273.0,246.0,216.0,245.0,237.0,...,252.0,229.0,249.0,301.0,328.0,295.0,122.0,91.0,27,2_1_1_9
