# Multi-Layer Perceptron

In [1]:
import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report

%matplotlib inline

In [2]:
def write_predictions(predictions, file_name='pred.csv'):
    with open(file_name, 'w') as file:
        file.write('ID,Label')
        for index, value in enumerate(predictions):
            file.write('\n{0},{1}'.format(index+1, value))

In [3]:
# Load Data
X_train = pd.read_csv('Train/trainVectors.csv', header=None).transpose()
y_train = pd.read_csv('Train/trainLbls.csv', header=None, names=['label'])['label']
X_validation = pd.read_csv('Validation/valVectors.csv', header=None).transpose()
y_validation = pd.read_csv('Validation/valLbls.csv', header=None, names=['label'])['label']
X_test = pd.read_csv('Test/testVectors.csv', header=None).transpose()

In [4]:
import tensorflow as tf
import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [5]:
num_classes = len(y_train.unique())
num_dimensions = X_train.shape[1]
num_neurons_hidden_layer = int((num_dimensions + num_classes) / 2)

def baseline_model():
    # define baseline model
    model = Sequential()
    model.add(Dense(num_neurons_hidden_layer, input_dim=num_dimensions, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [6]:
estimator = KerasClassifier(build_fn=baseline_model, epochs=20, batch_size=5, verbose=2)

In [7]:
X_train.shape

(5830, 4096)

In [8]:
kfold = KFold(n_splits=10, shuffle=True, random_state=42)

In [None]:
results = cross_val_score(estimator, X_train, y_train, cv=kfold)

Epoch 1/20
 - 135s - loss: 2.2735 - acc: 0.6364
Epoch 2/20
 - 137s - loss: 1.4584 - acc: 0.7831
Epoch 3/20
 - 140s - loss: 1.2653 - acc: 0.8355
Epoch 4/20
 - 132s - loss: 1.2158 - acc: 0.8557
Epoch 5/20
 - 132s - loss: 1.1850 - acc: 0.8757
Epoch 6/20
 - 134s - loss: 1.1872 - acc: 0.8828
Epoch 7/20
 - 142s - loss: 1.0581 - acc: 0.8982
Epoch 8/20
 - 132s - loss: 1.0754 - acc: 0.9017
Epoch 9/20
 - 131s - loss: 1.0588 - acc: 0.9089
Epoch 10/20
 - 132s - loss: 1.0006 - acc: 0.9163
Epoch 11/20
 - 131s - loss: 1.0473 - acc: 0.9152
Epoch 12/20
 - 132s - loss: 1.2772 - acc: 0.8971
Epoch 13/20
 - 133s - loss: 1.0449 - acc: 0.9200
Epoch 14/20
 - 132s - loss: 1.0034 - acc: 0.9226
Epoch 15/20
 - 132s - loss: 0.9833 - acc: 0.9266
Epoch 16/20
 - 132s - loss: 1.0140 - acc: 0.9251
Epoch 17/20
 - 131s - loss: 0.9404 - acc: 0.9304
Epoch 18/20
 - 132s - loss: 1.0176 - acc: 0.9238
Epoch 19/20
 - 131s - loss: 0.9431 - acc: 0.9341
Epoch 20/20
 - 131s - loss: 0.9733 - acc: 0.9304
Epoch 1/20
 - 134s - loss: 2.

In [None]:
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

In [None]:
y_validation_predictions = estimator.predict(X_validation)
print(classification_report(y_validation, y_validation_predictions))

In [8]:
#hist = estimator.fit(X_train, y_train, shuffle=True, validation_split=0.2, verbose=2)

Train on 4664 samples, validate on 1166 samples
Epoch 1/20
 - 120s - loss: 3.9904 - acc: 0.5971 - val_loss: 14.2498 - val_acc: 0.0489
Epoch 2/20
 - 119s - loss: 0.6574 - acc: 0.8422 - val_loss: 14.7031 - val_acc: 0.0480
Epoch 3/20
 - 119s - loss: 0.4850 - acc: 0.8885 - val_loss: 14.8774 - val_acc: 0.0515
Epoch 4/20
 - 119s - loss: 0.3432 - acc: 0.9237 - val_loss: 15.0133 - val_acc: 0.0463
Epoch 5/20
 - 122s - loss: 0.3266 - acc: 0.9363 - val_loss: 14.9705 - val_acc: 0.0429
Epoch 6/20
 - 120s - loss: 0.2952 - acc: 0.9470 - val_loss: 15.0774 - val_acc: 0.0549
Epoch 7/20
 - 119s - loss: 0.2562 - acc: 0.9543 - val_loss: 15.2387 - val_acc: 0.0437
Epoch 8/20
 - 120s - loss: 0.2755 - acc: 0.9552 - val_loss: 15.1056 - val_acc: 0.0575
Epoch 9/20
 - 119s - loss: 0.3266 - acc: 0.9539 - val_loss: 15.1964 - val_acc: 0.0497
Epoch 10/20
 - 121s - loss: 0.2394 - acc: 0.9648 - val_loss: 15.3161 - val_acc: 0.0420
Epoch 11/20
 - 120s - loss: 0.2999 - acc: 0.9584 - val_loss: 15.2860 - val_acc: 0.0420
Epoc

In [9]:
estimator.model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 2062)              8448014   
_________________________________________________________________
dense_2 (Dense)              (None, 29)                59827     
Total params: 8,507,841
Trainable params: 8,507,841
Non-trainable params: 0
_________________________________________________________________


In [12]:
#estimator.model.evaluate(X_validation, keras.utils.to_categorical(y_validation), verbose=1)

In [15]:
y_validation_predictions = estimator.predict(X_validation)

In [18]:
print(classification_report(y_validation, y_validation_predictions))

             precision    recall  f1-score   support

          1       0.97      1.00      0.98       116
          2       0.62      0.58      0.60        96
          3       0.73      0.70      0.72        94
          4       0.84      0.85      0.84        92
          5       0.74      0.83      0.78        88
          6       0.38      0.59      0.46        92
          7       0.57      0.40      0.47        92
          8       0.51      0.60      0.55        88
          9       0.75      0.77      0.76        88
         10       0.48      0.72      0.58        82
         11       0.86      0.92      0.89        86
         12       0.67      0.72      0.70        80
         13       0.66      0.96      0.78        80
         14       0.38      0.80      0.52        82
         15       0.40      0.80      0.54        82
         16       0.49      0.87      0.63        82
         17       0.84      0.90      0.87        80
         18       0.92      0.74      0.82   

  'precision', 'predicted', average, warn_for)
