In [41]:
import pandas as pd
import numpy as np
from tqdm import tqdm
tqdm.pandas()
import warnings
warnings.filterwarnings('ignore')
from keras.models import Sequential, Model
from keras.layers import Dense, Embedding, LSTM, Dropout, GRU, Conv1D, MaxPooling1D, Input, concatenate
from keras.datasets import imdb
from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint

## 1. Read in data for one store

In [2]:
# read in features of one store (TX_2)
features = pd.read_pickle('TX_2_features.pkl').dropna()

## 2. Define loss function

In [7]:
from keras import backend as K

def rmse(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true))) 

## 3. Set up training and validation sets

In [19]:
NUM_LAG_DAYS= 28

train = features[(features.d<=1863) & (features.d>=1069+NUM_LAG_DAYS)].dropna()
val = features[(features.d > 1863) & (features.d < 1914)].dropna()
test = features[features.d >= 1914]
drop_cols = ['sales','id','d','wm_yr_wk','original_id','store_id','item_id']
num_weeks = NUM_LAG_DAYS // 7
for j in range(1,num_weeks+1):
    drop_cols += ['lag'+str(j) for j in range(j*7-6,j*7)]
drop_cols += ['event_name_1','event_name_2','event_type_1','event_type_2']
x_train = train.drop(drop_cols,axis=1)
y_train = train.sales.values
x_val = val.drop(drop_cols,axis=1)
y_val = val.sales.values    

In [6]:
x_train.shape

(2273370, 48)

## 4. Define training configurations

In [14]:
# training configurations
num_epochs = 100
batch_size = 128
num_nodes = 128
num_layers = 3
dropout = 0.2
loss_fn = rmse
optimizer = 'adam'

## 5. Train MLP model with early stopping

In [15]:
mlp_model = Sequential()
mlp_model.add(Dense(num_nodes, input_dim=x_train.shape[1], activation='relu'))
mlp_model.add(Dropout(dropout))
for i in range(num_layers-1):
    mlp_model.add(Dense(num_nodes, activation='relu'))
    mlp_model.add(Dropout(dropout))
mlp_model.add(Dense(1, activation='relu'))
mlp_model.compile(loss=loss_fn, optimizer=optimizer, metrics=['accuracy'])
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
# mc = ModelCheckpoint('tfidf_best_model.h5', monitor='val_accuracy', mode='max', verbose=0, save_best_only=True)
mlp_model.fit(x_train, y_train,
                batch_size=batch_size,
                epochs=num_epochs,
                validation_data=(x_val, y_val),
                callbacks=[es],
                verbose=1)

Train on 2273370 samples, validate on 152450 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 00013: early stopping


<keras.callbacks.callbacks.History at 0x7fbeaa4622e8>