In [2]:
import pandas as pd
import theano
import lasagne
import numpy as np
from theano import tensor as T
import time

train = pd.read_hdf("train.h5", "train")
test = pd.read_hdf("test.h5", "test")

labelnumber=5   # 0 to 4

In [30]:
X.shape[1]

79

In [3]:
# convert from dataframe to numpy array
y_train = train.y.as_matrix()

X_train = train.iloc[:,1:101].as_matrix()
X_test= test.as_matrix()
X = np.concatenate((X_train, X_test), 0)

print(X.shape)

# drop feature with too small standard deviation  (<0.005))
drop_feature = np.where(X.std(axis=0)<0.005)
X = np.delete(X, drop_feature, axis=1)

dimension = X.shape[1]
print ("feature selected new dimension is", int(dimension))

#scale the X
from sklearn.preprocessing import scale
X = scale(X, axis=0)
X_train = X [0:X_train.shape[0],:]
X_test  = X [X_train.shape[0]:X.shape[0],:]

# divide into train set and eval set
num_sample = y_train.shape[0]

# take 1/10 for evaluation
num_eval = np.rint(num_sample/10)  
num_train = num_sample-num_eval


X_val = X_train[num_train:num_sample]
X_train = X_train[0:num_train]
y_val = y_train[num_train:num_sample].astype(int) 
y_train = y_train[0:num_train].astype(int) 

print('train set size', X_train.shape, y_train.shape)
print('Eval set size', X_val.shape, y_val.shape)
print('Test set size', X_test.shape)

(53461, 100)
feature selected new dimension is 79
train set size (40792, 79) (40792,)
Eval set size (4532, 79) (4532,)
Test set size (8137, 79)




In [4]:
# network 1: normal mlp(multilayer perceptron)
def build_mlp(input_var=None):
    l_in = lasagne.layers.InputLayer(shape=(None, dimension),
                                     input_var=input_var)
    
    l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)
    
    l_hid1 = lasagne.layers.DenseLayer(
        l_in_drop, num_units=400,
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.GlorotUniform())
    
    l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5)

    l_hid2 = lasagne.layers.DenseLayer(
        l_hid1_drop, num_units=500,
        nonlinearity=lasagne.nonlinearities.rectify)

    l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)
    
    l_out = lasagne.layers.DenseLayer(
        l_hid2_drop, num_units=5,
        nonlinearity=lasagne.nonlinearities.softmax)
    
    return l_out

In [5]:
# network 2:
def build_custom_mlp(input_var=None, depth=2, width=800, drop_input=.2,
                     drop_hidden=.5):
    # Input layer and dropout (with shortcut `dropout` for `DropoutLayer`):
    network = lasagne.layers.InputLayer(shape=(None, dimension),
                                    input_var=input_var)
    if drop_input:
        network = lasagne.layers.dropout(network, p=drop_input)
    # Hidden layers and dropout:
    nonlin = lasagne.nonlinearities.rectify
    for _ in range(depth):
        network = lasagne.layers.DenseLayer(
                network, width, nonlinearity=nonlin)
        if drop_hidden:
            network = lasagne.layers.dropout(network, p=drop_hidden)
    # Output layer:
    softmax = lasagne.nonlinearities.softmax
    network = lasagne.layers.DenseLayer(network, 5, nonlinearity=softmax)
    return network    

In [8]:
# network 3: convolutional neural network
def build_cnn(input_var=None):
    network = lasagne.layers.InputLayer(shape=(None, dimension),
                                        input_var=input_var)
    network = lasagne.layers.Conv2DLayer(
        network, num_filters=32, filter_size=(5, 5),
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.GlorotUniform())
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
    network = lasagne.layers.Conv2DLayer(
        network, num_filters=32, filter_size=(5, 5),
        nonlinearity=lasagne.nonlinearities.rectify)
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
    network = lasagne.layers.DenseLayer(
        lasagne.layers.dropout(network, p=.5),
        num_units=256,
        nonlinearity=lasagne.nonlinearities.rectify)
    network = lasagne.layers.DenseLayer(
        lasagne.layers.dropout(network, p=.5),
        num_units=5,
        nonlinearity=lasagne.nonlinearities.softmax)

    return network

In [10]:
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]

In [21]:
# Prepare Theano variables for inputs and targets
input_var = T.dmatrix('inputs')
target_var = T.ivector('targets')

# Create neural network model

#network = build_mlp(input_var)

network = build_custom_mlp(input_var, depth=2, width=800, drop_input=.2,
                     drop_hidden=.5)


prediction = lasagne.layers.get_output(network)

# define loss function
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()

# update weights
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(
        loss, params, learning_rate=0.01, momentum=0.9)

test_prediction = lasagne.layers.get_output(network, deterministic=True)


test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                        target_var)
test_loss = test_loss.mean()

test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                  dtype=theano.config.floatX)

train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True)
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
predict_fn = theano.function([input_var], T.argmax(prediction, axis=1, keepdims=False))

In [28]:
num_epochs = 100
batchsize = 500
for epoch in range(num_epochs):
    # In each epoch, we do a full pass over the training data:
    train_err = np.int64(0)
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):
        inputs, targets = batch
        tmp = train_fn(inputs, targets)
        train_err += tmp.astype(np.int32) 
        train_batches += 1

    # And a full pass over the validation data:
    val_err = 0
    val_acc = 0
    val_batches = 0
    
    for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):
        inputs, targets = batch
        tmp = val_fn(inputs.astype(np.int32), targets.astype(np.int32))
        err, acc = tmp
        val_err += err
        val_acc += acc
        val_batches += 1

    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))

Epoch 1 of 100 took 9.049s
  training loss:		0.000000
  validation loss:		0.647460
  validation accuracy:		81.27 %
Epoch 2 of 100 took 6.323s
  training loss:		0.000000
  validation loss:		0.620924
  validation accuracy:		82.53 %
Epoch 3 of 100 took 7.147s
  training loss:		0.000000
  validation loss:		0.613876
  validation accuracy:		82.84 %
Epoch 4 of 100 took 6.291s
  training loss:		0.000000
  validation loss:		0.601132
  validation accuracy:		83.69 %
Epoch 5 of 100 took 7.125s
  training loss:		0.000000
  validation loss:		0.598075
  validation accuracy:		83.49 %
Epoch 6 of 100 took 7.361s
  training loss:		0.000000
  validation loss:		0.593177
  validation accuracy:		84.16 %
Epoch 7 of 100 took 9.794s
  training loss:		0.000000
  validation loss:		0.580798
  validation accuracy:		84.80 %
Epoch 8 of 100 took 8.440s
  training loss:		0.000000
  validation loss:		0.579916
  validation accuracy:		84.42 %
Epoch 9 of 100 took 8.384s
  training loss:		0.000000
  validation loss:		0.5788

KeyboardInterrupt: 

In [None]:
#val_fn(inputs, targets)

In [93]:
y_test = predict_fn(X_test)
print('Prediction tset size', y_test.shape)

('Prediction tset size', (8137L,))


In [94]:
sub = pd.read_csv("sample.csv")
sub['y'] = y_test
sub.head()
sub.to_csv('mlp_800_800_200epoch.csv', index = False)