In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from __future__ import print_function

In [13]:
from sklearn import cross_validation
import xgboost as xgb

def ToWeight(y):
    w = np.zeros(y.shape, dtype=float)
    ind = y != 0
    w[ind] = 1./(y[ind]**2)
    return w

def rmspe(yhat, y):
    w = ToWeight(y)
    rmspe = np.sqrt(np.mean( w * (y - yhat)**2 ))
    return rmspe

def rmspe_xg(yhat, y):
    # y = y.values
    y = y.get_label()
    y = np.exp(y) - 1
    yhat = np.exp(yhat) - 1
    w = ToWeight(y)
    rmspe = np.sqrt(np.mean(w * (y - yhat)**2))
    return "rmspe", rmspe

def rms(yhat, y):
    if y != 0:
        return ((y - yhat))**2
    else:
        return 0

In [2]:
train = pd.read_csv('train_nnew.csv', low_memory=False)
test = pd.read_csv('test_nnew.csv', low_memory=False)
features = [u'Open', u'Promo', u'SchoolHoliday', u'StateHoliday_0',
       u'StateHoliday_a', u'DayOfWeek_1', u'DayOfWeek_2', u'DayOfWeek_3',
       u'DayOfWeek_4', u'DayOfWeek_5', u'DayOfWeek_6', u'DayOfWeek_7',
       u'CompetitionDistance', u'Promo2', 'year', 'Mean_Sales', 'month', 'day',
       u'StoreType_a', u'StoreType_b', u'StoreType_c', u'StoreType_d',
       u'Assortment_a', u'Assortment_b', u'Assortment_c', u'CompetitionOpen']

In [3]:
train['year'] = train.Date.apply(lambda x: x.split('-')[0])
train['year'] = train['year'].astype(float)
train['month'] = train.Date.apply(lambda x: x.split('-')[1])
train['month'] = train['month'].astype(float)
train['day'] = train.Date.apply(lambda x: x.split('-')[2])
train['day'] = train['day'].astype(float)

In [4]:
test['year'] = test.Date.apply(lambda x: x.split('-')[0])
test['year'] = test['year'].astype(float)
test['month'] = test.Date.apply(lambda x: x.split('-')[1])
test['month'] = test['month'].astype(float)
test['day'] = test.Date.apply(lambda x: x.split('-')[2])
test['day'] = test['day'].astype(float)

In [5]:
features = ['Open', 'Promo', 'SchoolHoliday', 'StateHoliday0',
       'StateHolidaya', 'DayOfWeek1', 'DayOfWeek2', 'DayOfWeek3',
       'DayOfWeek4', 'DayOfWeek5', 'DayOfWeek6', 'DayOfWeek7',
       'CompetitionDistance', 'Promo2', 'year', 'MeanSales', 'month', 'day',
       'StoreTypea', 'StoreTypeb', 'StoreTypec', 'StoreTyped',
       'Assortmenta', 'Assortmentb', 'Assortmentc', 'CompetitionOpen']

In [6]:
test.columns = [u'Id', u'Store', u'DayOfWeek', u'Date', u'Open', u'Promo',
       u'StateHoliday', u'SchoolHoliday', u'StateHolidaya', u'StateHoliday0',
       u'DayOfWeek1', u'DayOfWeek2', u'DayOfWeek3', u'DayOfWeek4',
       u'DayOfWeek5', u'DayOfWeek6', u'DayOfWeek7', u'CompetitionDistance',
       u'Promo2', u'CompetitionOpenSince', u'StoreTypea', u'StoreTypeb',
       u'StoreTypec', u'StoreTyped', u'Assortmenta', u'Assortmentb',
       u'Assortmentc', u'CompetitionOpen', u'MeanSales', u'year', u'month',
       u'day']

In [7]:
train.columns = [u'Store', u'DayOfWeek', u'Date', u'Sales', u'Customers', u'Open',
       u'Promo', u'StateHoliday', u'SchoolHoliday', u'StateHoliday0',
       u'StateHolidaya', u'DayOfWeek1', u'DayOfWeek2', u'DayOfWeek3',
       u'DayOfWeek4', u'DayOfWeek5', u'DayOfWeek6', u'DayOfWeek7',
       u'CompetitionDistance', u'Promo2', u'CompetitionOpenSince',
       u'StoreTypea', u'StoreTypeb', u'StoreTypec', u'StoreTyped',
       u'Assortmenta', u'Assortmentb', u'Assortmentc', u'CompetitionOpen',
       u'MeanSales', u'year', u'month', u'day']

In [8]:
len(features)

26

In [9]:
from lasagne import layers
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet

Using gpu device 0: GeForce GTX 670 (CNMeM is disabled)


In [10]:
import lasagne

In [17]:
net1 = NeuralNet(
    layers=[  # three layers: one hidden layer
        ('input', layers.InputLayer),
        ('hidden', layers.DenseLayer),
        ('output', layers.DenseLayer),
        ],
    # layer parameters:
    input_shape=(None, 26),  # 96x96 input pixels per batch
    hidden_num_units=1000,  # number of units in hidden layer
    output_nonlinearity=lasagne.nonlinearities.identity,  # output layer uses identity function
    output_num_units=1,  # 30 target values
    objective_loss_function = lasagne.objectives.squared_error,
    # optimization method:
    update=nesterov_momentum,
    update_learning_rate=0.01,
    update_momentum=0.9,

    regression=True,  # flag to indicate we're dealing with regression problem
    max_epochs=400,  # we want to train this many epochs
    verbose=1,
    )

In [None]:
NeuralNet()

In [46]:
(train[features].values).astype('float32')

array([[ 1.,  1.,  1., ...,  0.,  0.,  1.],
       [ 1.,  1.,  1., ...,  0.,  0.,  1.],
       [ 1.,  1.,  1., ...,  0.,  0.,  1.],
       ..., 
       [ 0.,  0.,  1., ...,  0.,  1.,  1.],
       [ 0.,  0.,  1., ...,  0.,  1.,  1.],
       [ 0.,  0.,  1., ...,  0.,  1.,  1.]], dtype=float32)

NameError: name 'lasagne' is not defined

In [47]:
((np.log(train["Sales"] + 1)).values).astype('float32').shape

(1017209,)

In [18]:
#X_train, X_test = cross_validation.train_test_split(train, test_size=0.05)
net1.fit((train[features].values).astype('float32'), ((np.log(train["Sales"] + 1)).values).astype('float32'))

# Neural Network with 28001 learnable parameters

## Layer information

  #  name      size
---  ------  ------
  0  input       26
  1  hidden    1000
  2  output       1

  epoch    train loss    valid loss    train/val  dur
-------  ------------  ------------  -----------  -----
      1           nan           nan          nan  9.30s
      2           nan           nan          nan  9.00s


NeuralNet(X_tensor_type=None,
     batch_iterator_test=<nolearn.lasagne.base.BatchIterator object at 0x7fce90163a50>,
     batch_iterator_train=<nolearn.lasagne.base.BatchIterator object at 0x7fce901639d0>,
     custom_score=None, hidden_num_units=1000, input_shape=(None, 26),
     layers=[('input', <class 'lasagne.layers.input.InputLayer'>), ('hidden', <class 'lasagne.layers.dense.DenseLayer'>), ('output', <class 'lasagne.layers.dense.DenseLayer'>)],
     loss=None, max_epochs=400, more_params={},
     objective=<function objective at 0x7fce900eb0c8>,
     objective_loss_function=<function squared_error at 0x7fce902bb6e0>,
     on_epoch_finished=[<nolearn.lasagne.handlers.PrintLog instance at 0x7fce7c5cf8c0>],
     on_training_finished=[],
     on_training_started=[<nolearn.lasagne.handlers.PrintLayerInfo instance at 0x7fce7c5cfc20>],
     output_nonlinearity=<function linear at 0x7fce907126e0>,
     output_num_units=1, regression=True,
     train_split=<nolearn.lasagne.base.TrainSpli

In [14]:
X_train, X_val = cross_validation.train_test_split(train, test_size=0.05)

In [15]:
y_train = X_train['Sales'].values.astype('float32')
X_train = X_train[features].values.astype('float32')

In [16]:
y_val = X_val['Sales'].values.astype('float32')
X_val = X_val[features].values.astype('float32')

In [17]:
y_train = y_train.reshape(-1,1)

In [18]:
y_val = y_val.reshape(-1,1)

In [19]:
import theano
import theano.tensor as T

In [34]:
def build_custom_mlp(input_var=None, depth=2, width=800, drop_input=.2,
                     drop_hidden=.5):
    # By default, this creates the same network as `build_mlp`, but it can be
    # customized with respect to the number and size of hidden layers. This
    # mostly showcases how creating a network in Python code can be a lot more
    # flexible than a configuration file. Note that to make the code easier,
    # all the layers are just called `network` -- there is no need to give them
    # different names if all we return is the last one we created anyway; we
    # just used different names above for clarity.

    # Input layer and dropout (with shortcut `dropout` for `DropoutLayer`):
    network = lasagne.layers.InputLayer(shape=(None, 26),
                                        input_var=input_var)
    if drop_input:
        network = lasagne.layers.dropout(network, p=drop_input)
    # Hidden layers and dropout:
    nonlin = lasagne.nonlinearities.rectify
    for _ in range(depth):
        network = lasagne.layers.DenseLayer(
                network, width, nonlinearity=nonlin)
        if drop_hidden:
            network = lasagne.layers.dropout(network, p=drop_hidden)
    # Output layer:
    softmax = lasagne.nonlinearities.softmax
    network = lasagne.layers.DenseLayer(network, 1, nonlinearity=softmax)
    return network

In [21]:
input_var = T.matrix('inputs', dtype = 'float32')
target_var = T.matrix('targets', dtype = 'float32')

In [35]:
network = build_custom_mlp(input_var)

In [36]:
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.squared_error(prediction, target_var)
loss = loss.mean()

In [37]:
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(
            loss, params, learning_rate=0.01, momentum=0.9)

In [38]:
# Create a loss expression for validation/testing. The crucial difference
# here is that we do a deterministic forward pass through the network,
# disabling dropout layers.
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.squared_error(test_prediction,
                                                        target_var)
test_loss = test_loss.mean()
# As a bonus, also create an expression for the classification accuracy:
#test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
#                  dtype=theano.config.floatX)
test_acc = lasagne.objectives.squared_error(test_prediction,
                                                        target_var)
test_acc = test_acc.mean()

# Compile a function performing a training step on a mini-batch (by giving
# the updates dictionary) and returning the corresponding training loss:
train_fn = theano.function([input_var, target_var], loss, updates=updates)

# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

In [39]:
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]


In [40]:
import time
num_epochs = 10
for epoch in range(num_epochs):
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):
        inputs, targets = batch
        train_err += train_fn(inputs, targets)
        train_batches += 1

    # And a full pass over the validation data:
    val_err = 0
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(X_val, y_val, 500):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        val_err += err
        val_acc += acc
        val_batches += 1

    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("  validation accuracy:\t\t{:.2f}".format(val_acc / val_batches * 100))

Epoch 1 of 10 took 12.126s
  training loss:		48135749.312629
  validation loss:		48361477.742574
  validation accuracy:		4836147774.26
Epoch 2 of 10 took 11.887s
  training loss:		48136167.689441
  validation loss:		48361477.742574
  validation accuracy:		4836147774.26
Epoch 3 of 10 took 11.693s
  training loss:		48135908.811594
  validation loss:		48361477.742574
  validation accuracy:		4836147774.26
Epoch 4 of 10 took 11.870s
  training loss:		48137019.753623
  validation loss:		48361477.742574
  validation accuracy:		4836147774.26
Epoch 5 of 10 took 11.803s
  training loss:		48136216.540373
  validation loss:		48361477.742574
  validation accuracy:		4836147774.26
Epoch 6 of 10 took 12.119s
  training loss:		48135410.275362
  validation loss:		48361477.742574
  validation accuracy:		4836147774.26


KeyboardInterrupt: 