In [61]:
# please download all the files from here https://www.kaggle.com/c/walmart-recruiting-store-sales-forecasting/data

import pandas as pd
import numpy as np

train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
features_df = pd.read_csv("features.csv")
stores_df = pd.read_csv("stores.csv")

def load_all():
    train_df = pd.read_csv("train.csv")
    features_df = pd.read_csv("features.csv")
    stores_df = pd.read_csv("stores.csv")
    
    return pd.merge(pd.merge(train_df, features_df), stores_df)

print "Total Number of Records", len(train_df)

Total Number of Records 421570


In [33]:
# configuration
SEQ_LEN = 10 # give 9 weeks, predict the sale of next week
FEATURES = ['DayOfYear', 'Weekly_Sales']

In [63]:
from datetime import datetime

def convert_day_of_year(x):
    return datetime.strptime(x, "%Y-%m-%d").timetuple().tm_yday

def sliding(x, seq_len):
    t = np.zeros([x.shape[0] - seq_len + 1, seq_len, x.shape[1]])
    for i in range(t.shape[0]):
        t[i,:,:] = x[i:i+seq_len,:]
    
    return t

# Try to predict the sales for only 1 store and 1 department
store_1_sales = train_df[(train_df.Store == 1) & (train_df.Dept == 1)]
store_1_features = features_df[features_df.Store == 1]
store_1 = pd.merge(store_1_sales, store_1_features)
store_1['DayOfYear'] = store_1['Date'].apply(convert_day_of_year)

store_1 = store_1[FEATURES]
print "Number of store 1 & dep 1 records:", len(store_1)

# sliding
result = sliding(store_1.values, SEQ_LEN)

# also split into train & test
num_train = int(len(result) * 0.9)
store_1_train = result[:num_train]
store_1_test = result[num_train:]

# some normalization
train_mean = store_1_train[0].mean()
train_std = store_1_train[0].std()

store_1_train[0] /= train_std
store_1_test[0] /= train_std

np.random.shuffle(store_1_train)
X_train = store_1_train[:, :SEQ_LEN - 1]
Y_train = store_1_train[:, SEQ_LEN - 1, len(FEATURES) - 1]

X_test = store_1_test[:, :SEQ_LEN - 1]
Y_test = store_1_test[:, SEQ_LEN - 1, len(FEATURES) - 1]

Number of store 1 & dep 1 records: 143


In [64]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Masking
from keras.layers.recurrent import LSTM

def build_model(seq_len, num_features):
    model = Sequential()

    #model.add(Masking(0, input_shape=(seq_len-1, num_features)))
    model.add(LSTM(20, init='lecun_uniform', return_sequences=False, input_shape=(seq_len-1, num_features)))
    #model.add(Dropout(0.2))
    #model.add(LSTM(20, return_sequences=False))
    #model.add(Dropout(0.2))
    model.add(Dense(output_dim=max(1, num_features-1)))
    model.add(Activation('linear'))
    
    model.compile(loss="mse", optimizer="rmsprop")
    return model

In [65]:
model = build_model(SEQ_LEN, len(FEATURES))

In [66]:
def train(model, X_train, Y_train):
    epochs = 1000
    
    try:
        model.fit(
            X_train, Y_train,
            batch_size=512, nb_epoch=epochs, validation_split=0.1,
            verbose=0
        )
    except Exception as ex:
        print ex
    
    return model

In [67]:
model = train(model, X_train, Y_train)

In [69]:
model.predict(X_test[0:10]), Y_test[0:10]

(array([[ 0.81281161],
        [ 6.68711233],
        [ 6.68711233],
        [ 6.68711233],
        [ 6.68711233],
        [ 6.68711233],
        [ 6.68711233],
        [ 6.68711233],
        [ 6.68711233],
        [ 6.68711233]]),
 array([  8.52066759e-01,   1.66283100e+04,   1.61199200e+04,
          1.73307000e+04,   1.62864000e+04,   1.66802400e+04,
          1.83223700e+04,   1.96162200e+04,   1.92515000e+04,
          1.89478100e+04]))