In [42]:
import time
notebookstart= time.time()

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from keras.models import Sequential
from keras.layers import LSTM, Dense, Activation, ThresholdedReLU, MaxPooling2D, Embedding, Dropout
from keras.optimizers import Adam, SGD, RMSprop
from keras import optimizers
from keras import backend as K
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
import gc

# Viz
import matplotlib.pyplot as plt
%matplotlib inline

# Utility
def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) 

# Import data
sales = pd.read_csv('../input/sales_train.csv', parse_dates=['date'], infer_datetime_format=True, dayfirst=True)
shops = pd.read_csv('../input/shops.csv')
items = pd.read_csv('../input/items.csv')
cats = pd.read_csv('../input/item_categories.csv')
val = pd.read_csv('../input/test.csv')

# scaler = preprocessing.StandardScaler()
scaler = MinMaxScaler(feature_range=(0,1))
sales["item_price"] = scaler.fit_transform(sales["item_price"].values.reshape(-1,1))
#sales["item_cnt_day"] = scaler.fit_transform(sales["item_cnt_day"].values.reshape(-1,1))
# sales["item_cnt_day"] = sales["item_cnt_day"].astype(int)
# df["item_cnt_day"].clip(0.,20.,inplace=True)

# Remove the items/shops outside of forecast range
sales = pd.merge(val,sales,on=['item_id','shop_id'], how='left')
sales = sales.fillna(0)
sales["item_cnt_day"].clip(0.,20.,inplace=True)

# Represents the submission set
expand = sales.loc[sales.date_block_num == 33,:]
expand.loc[:,"date_block_num"] = 34.0
sales = pd.concat([sales,expand])

# Clean
df = (sales.drop("date",axis=1).groupby(["date_block_num",'shop_id',"item_id"])[["item_price","item_cnt_day"]].sum()
                .unstack(level=[1,2]).fillna(0)
                .stack([1,2]).fillna(0).reset_index())

df["item_cnt_day"].clip(0.,20.,inplace=True)

In [43]:
# Merge and Expand Category Variable
items = pd.merge(items, cats, on = "item_category_id",how='left')
items = (pd.concat([items,items.item_category_name.str.split('-', n=1,expand=True)], axis=1)
      .rename(columns = {0:"category1",1:"category2"}))[["item_id","category1","category2"]]

# Encode Russian Strings into categorical interger
lbl = preprocessing.LabelEncoder()
for col in ["category1","category2"]:
    items[col] = lbl.fit_transform(items[col].astype(str))

# Merge Df and Items.. 
df = pd.merge(df, items,on="item_id",how="left")

# Additional Ideas:
"""
Stochastic Gradient Descent
Batch Normalization
Less Dropout?
"""

In [44]:
# Brand New.. !
y_var = df.loc[df.date_block_num != 0,"item_cnt_day"].copy()
df.date_block_num = df.date_block_num + 1
# 
n_samples = df["shop_id"].nunique()*df["item_id"].nunique()
df = df.drop(["shop_id","item_id"],axis=1).set_index("date_block_num")

# Add Lag Variable (1 and 2 month lag)
#df = pd.concat([df, df.shift(), df.shift(2)], axis=1).fillna(0)
#df.columns = ["item_cnt_day","item_price","item_cnt_day_t-1","item_price_t-1","item_cnt_day_t-2","item_price_t-2"]

In [None]:
df.head()

In [37]:
# # 35 time BUILD
# # Matrix
# npdf = df.values.reshape(n_samples,35,df.shape[1])
# print("All Shape: ",npdf.shape)

# # Input Output
# y = npdf[:,-1,0].reshape(n_samples,1)
# X = npdf[:,:-1,2:]#.reshape(n_samples,35,4)
# print("y Shape: ",y.shape)
# print("X Shape: ",X.shape)

# # Validation Set
# y_train = npdf[:,-4,0].reshape(n_samples,1)
# y_valid = npdf[:,-2,0].reshape(n_samples,1)
# X_train = npdf[:,:17,2:]
# X_valid = npdf[:,17:-1,2:]
# print("\ny Train Shape: ",y_train.shape)
# print("X Train Shape: ",X_train.shape)
# print("y Valid Shape: ",y_valid.shape)
# print("X Valid Shape: ",X_valid.shape)

# # Test Set
# testing = npdf[:,1:,2:]#.reshape(n_samples,35,4)
# print("\nX for Submission Shape: ",X.shape)

# # 1 time BUILD
# # Matrix
# npdf = df.values.reshape(n_samples*35,1,df.shape[1])
# print("All Shape: ",npdf.shape)

# # Input Output
# y = npdf[:-n_samples,:,0]
# X = npdf[:-n_samples,:,2:]
# print("y Shape: ",y.shape)
# print("X Shape: ",X.shape)

# # Validation Set
# y_train = y[:-3*n_samples,:]
# y_valid = y[-3*n_samples:,:]
# X_train = X[:-3*n_samples,:,:]
# X_valid = X[-3*n_samples:,:,:]
# print("y Train Shape: ",y_train.shape)
# print("X Train Shape: ",X_train.shape)
# print("y Valid Shape: ",y_valid.shape)
# print("X Valid Shape: ",X_valid.shape)

# # Test Set
# testing = npdf[n_samples:,:,2:]
# print("X for Submission Shape: ",X.shape)

In [38]:
# NEW ! 35 time BUILD
# Matrix
npdf = df.values.reshape(n_samples,35,df.shape[1])
print("All Shape: ",npdf.shape)

# Input Output
y = y_var.values.reshape(n_samples,1)
X = npdf[:,:-1,2:]#.reshape(n_samples,35,4)
print("y Shape: ",y.shape)
print("X Shape: ",X.shape)

# Validation Set
y_train = npdf[:,-4,0].reshape(n_samples,1)
y_valid = npdf[:,-2,0].reshape(n_samples,1)
X_train = npdf[:,:17,2:]
X_valid = npdf[:,17:-1,2:]
print("\ny Train Shape: ",y_train.shape)
print("X Train Shape: ",X_train.shape)
print("y Valid Shape: ",y_valid.shape)
print("X Valid Shape: ",X_valid.shape)

# Test Set
testing = npdf[:,1:,2:]#.reshape(n_samples,35,4)
print("\nX for Submission Shape: ",X.shape)

In [None]:
VALID = True
if VALID is True:
    inputshape = (X_train.shape[1], X_train.shape[2])
else: 
    inputshape = (X.shape[1], X.shape[2])

LSTM_PARAM = {"batch_size":128,
              "verbose":2,
              "epochs":4}
    
print("Modeling Stage")
# Define the model layers
model_lstm = Sequential()
model_lstm.add(LSTM(16, input_shape=inputshape))#,return_sequences=True))
model_lstm.add(Dropout(0.5))
# model_lstm.add(LSTM(32))
# model_lstm.add(Dropout(0.5))
model_lstm.add(Dense(1))

from keras import optimizers
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model_lstm.compile(optimizer=sgd, loss='mse', metrics=["mse"])
print(model_lstm.summary())

# Train Model
print("\nFit Model")
modelstart = time.time()
if VALID is True:
    callbacks_list=[EarlyStopping(monitor="val_loss",min_delta=.001, patience=5,mode='auto')]
    hist = model_lstm.fit(X_train, y_train,
                          validation_data=(X_valid, y_valid),
                          callbacks=callbacks_list,
                          **LSTM_PARAM)

    # Model Evaluation
    best = np.argmin(hist.history["val_loss"])
    print("Optimal Epoch: ",best+1)
    print("Train Score: {}, Validation Score: {}".format(hist.history["loss"][best],hist.history["val_loss"][best]))

    plt.plot(hist.history['loss'], label='train')
    plt.plot(hist.history['val_loss'], label='validation')
    plt.xlabel("Epochs")
    plt.ylabel("Mean Square Error")
    plt.legend()
    plt.show()
    plt.savefig("Train and Validation MSE Progression.png")

if VALID is False:
    hist = model_lstm.fit(X,y,**LSTM_PARAM)
    
    plt.plot(hist.history['loss'], label='Training Loss')
    plt.xlabel("Epochs")
    plt.ylabel("Mean Square Error")
    plt.legend()
    plt.show()
    plt.savefig("Training Loss Progression.png")

In [None]:
# Predict
if VALID is False:
    #pred = model_lstm.predict(testing)[-n_samples:]
    pred = model_lstm.predict(testing)

    print("Output Submission")
    submission = pd.DataFrame(pred.clip(0.,20.),columns=['item_cnt_month'])
    submission.to_csv('submission.csv',index_label='ID')

    print(submission.shape)
    print(submission.head())
    print("\nModel Runtime: %0.2f Minutes"%((time.time() - modelstart)/60))
    print("Notebook Runtime: %0.2f Minutes"%((time.time() - notebookstart)/60))