In [0]:
import tensorflow as tf
from tensorflow import keras
from sklearn import metrics, preprocessing, ensemble

In [0]:
import os
import numpy as np
from typing import Sequence

data_dir = '.'

def load_data(which: str):
    """
    Loads data from a csv file
    :param which: str
        Which data to load, train or test
    """
    assert which in ['train', 'test']
    
    if which == 'train':
        data = np.loadtxt(fname=os.path.join(data_dir, 'train_data.csv'), delimiter=',', skiprows=1)
        labels = np.loadtxt(fname=os.path.join(data_dir, 'train_labels.csv'), delimiter=',', skiprows=1)
        return data, labels
    elif which == 'test':
        data = np.loadtxt(fname=os.path.join(data_dir, 'test_data.csv'), delimiter=',', skiprows=1)
        return data
    
def save_prediction(prediction: Sequence[int], 
                    path: str = 'submission.csv'):
    """
    Saves a sequence of predictions into a csv file with additional index column
    :param prediction: Sequ ence of ints
        Predictions to save
    :param path: str
        Path to a file to save into
    """
    
    pred_with_id = np.stack([np.arange(len(prediction)), prediction], axis=1)
    np.savetxt(fname=path, X=pred_with_id, fmt='%d', delimiter=',', header='id,label', comments='')

In [0]:
X_train, y_train = load_data(which='train')
X_test = load_data(which='test')

In [0]:
normalizer = preprocessing.StandardScaler()
# normalizer = preprocessing.QuantileTransformer(output_distribution='normal')
X_train = normalizer.fit_transform(X_train)
X_test = normalizer.transform(X_test)

In [0]:
X_wo_valid = X_train[:round(0.75*len(X_train))]
y_wo_valid = y_train[:round(0.75*len(y_train))]

In [0]:
X_valid = X_train[round(0.75*len(X_train)):]
y_valid = y_train[round(0.75*len(y_train)):]

In [7]:
from keras.utils import to_categorical
y_train_bin = to_categorical(y_train)
y_wo_valid_bin = to_categorical(y_wo_valid)
y_valid_bin = to_categorical(y_valid)

Using TensorFlow backend.


In [56]:
input_layer = keras.layers.Input(shape=(X_train.shape[1], ))
x = keras.layers.Dropout(0.5)(input_layer)

x = keras.layers.Dense(342)(input_layer)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.PReLU()(x)
x = keras.layers.Dropout(0.5)(x)

x = keras.layers.Dense(342)(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.PReLU()(x)
x = keras.layers.Dropout(0.5)(x)

x = keras.layers.Dense(342)(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.PReLU()(x)
x = keras.layers.Dropout(0.5)(x)

x = keras.layers.Dense(10)(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Softmax()(x)

model = keras.models.Model(inputs=input_layer, outputs=x)

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 342)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 342)               117306    
_________________________________________________________________
batch_normalization_v1_10 (B (None, 342)               1368      
_________________________________________________________________
p_re_lu_7 (PReLU)            (None, 342)               342       
_________________________________________________________________
dropout_11 (Dropout)         (None, 342)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 342)               117306    
_________________________________________________________________
batch_normalization_v1_11 (B (None, 342)               1368      
__________

In [0]:
model_directory = '.'
model_name = 'model'

In [0]:
early_stopper = keras.callbacks.EarlyStopping(monitor="val_loss",
                              patience=32,
                              verbose=True,
                              mode="auto")

model.compile(optimizer='adam', loss='categorical_crossentropy')

checkpoint_callback = keras.callbacks.ModelCheckpoint(("%s%s.h5" % (model_directory, model_name)),
                                      monitor="val_loss",
                                      verbose=False,
                                      save_best_only=True,
                                      mode="min")

reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=8, min_lr=0.001)

In [59]:
history = model.fit(X_train,
                    y_train_bin,
                    epochs=2048,
                    batch_size=64,
                    validation_split=0.25,
#                     verbose=2,
                    shuffle=True,
                    callbacks=[early_stopper, checkpoint_callback, reduce_lr])

Train on 44685 samples, validate on 14895 samples
Epoch 1/2048
Epoch 2/2048
Epoch 3/2048
Epoch 4/2048
Epoch 5/2048
Epoch 6/2048
Epoch 7/2048
Epoch 8/2048
Epoch 9/2048
Epoch 10/2048
Epoch 11/2048
Epoch 12/2048
Epoch 13/2048
Epoch 14/2048
Epoch 15/2048
Epoch 16/2048
Epoch 17/2048
Epoch 18/2048
Epoch 19/2048
Epoch 20/2048
Epoch 21/2048
Epoch 22/2048
Epoch 23/2048
Epoch 24/2048
Epoch 25/2048
Epoch 26/2048
Epoch 27/2048
Epoch 28/2048
Epoch 29/2048
Epoch 30/2048
Epoch 31/2048
Epoch 32/2048
Epoch 33/2048
Epoch 34/2048
Epoch 35/2048
Epoch 36/2048
Epoch 37/2048
Epoch 38/2048
Epoch 39/2048
Epoch 40/2048
Epoch 41/2048
Epoch 42/2048
Epoch 43/2048
Epoch 44/2048
Epoch 45/2048
Epoch 46/2048
Epoch 47/2048
Epoch 48/2048
Epoch 49/2048
Epoch 50/2048
Epoch 00050: early stopping


In [0]:
model = keras.models.load_model("%s%s.h5" % (model_directory, model_name))

In [0]:
preds_valid = model.predict(X_valid)
preds_valid = np.round(np.argmax(preds_valid, axis=1))

In [64]:
metrics.accuracy_score(y_valid, preds_valid)

0.848069822087949

In [0]:
preds = model.predict(X_test)
preds = np.round(np.round(np.argmax(preds, axis=1)))

In [0]:
random_pred = np.random.randint(0, 10, size=X_test.shape[0])
assert random_pred.shape == preds.shape

In [0]:
save_prediction(prediction=preds)