In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import keras
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.callbacks import EarlyStopping, ModelCheckpoint

Using TensorFlow backend.


In [2]:
def preprocess_data(X, scaler=None):
    '''Standardize features by removing the mean and scaling to unit variance'''
    if not scaler:
        scaler = StandardScaler()
        scaler.fit(X)
    return scaler.transform(X), scaler

def preprocess_labels(y, encoder=None, categorical=True):
    '''Encode labels with value between 0 and n_classes-1.'''
    if not encoder:
        encoder = LabelEncoder()
        encoder.fit(y)
    labels = encoder.transform(y).astype(np.int32)
    if categorical:
        labels = np_utils.to_categorical(labels)
    return labels, encoder

def load_data(path):
    df = pd.read_csv(path)
    data = df.values
    np.random.shuffle(data)
    X = data[:, 1:-1].astype(np.float32)
    y = data[:, -1]
    y, _ = preprocess_labels(y)
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5)
    x_train, scaler = preprocess_data(x_train)
#     we use only the training data to normalize the data.
    x_test, _ = preprocess_data(x_test, scaler)
    x_val, _ = preprocess_data(x_val, scaler)
    return ((x_train, y_train),
            (x_val, y_val),
            (x_test, y_test))

In [3]:
path = "data/train.csv"
(x_train, y_train), (x_val, y_val), (x_test, y_test) = load_data(path=path)

In [4]:
dims = x_train.shape[1]
nb_classes = y_train.shape[1]
print("Number of dimensions =", dims)
print("Number of classes =", nb_classes)

Number of dimensions = 93
Number of classes = 9


In [5]:
model = Sequential()
model.add(Dense(100, input_shape=(dims,)))
# model.add(Dense(100))
# model.add(Dense(100))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
model.compile(optimizer='sgd', loss='categorical_crossentropy')

path_to_model = 'model/model.h5'
early_stop = EarlyStopping(monitor='val_loss', patience=4, verbose=1) 
best_model = ModelCheckpoint(path_to_model, verbose=0, save_best_only=True)

model.fit(x_train, y_train,
          validation_data = (x_val, y_val),
          nb_epoch=100,
          batch_size=128,
          verbose = True,
          callbacks=[early_stop, best_model])

model.evaluate(x_val, y_val, verbose=1)

Train on 43314 samples, validate on 9282 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 00066: early stopping

0.65315415743121141

In [6]:
model.metrics_names

['loss']

In [7]:
model.predict(x_val)[0]

array([ 0.23099582,  0.06904652,  0.02361656,  0.00663695,  0.02984776,
        0.15226662,  0.06315273,  0.26238906,  0.16204797], dtype=float32)

In [8]:
y_val[0]

array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])