In [1]:
from keras.models import Sequential
from keras.utils import np_utils
from keras.layers.core import Dense, Activation, Dropout
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.manifold import TSNE

Using TensorFlow backend.


In [2]:
import pandas as pd
import numpy as np

Read Data

In [3]:
train = pd.read_csv('../input/train.csv')
labels = train.ix[:,0].values.astype('int32')
X_train = train.ix[:,1:].values.astype('float32')
X_test = (pd.read_csv('../input/test.csv').values).astype('float32')

In [4]:
labels

array([1, 0, 1, ..., 7, 6, 9], dtype=int32)

Convert labels to class matrix

In [5]:
y_train = np_utils.to_categorical(labels)

Preprocessing (standardization)

In [6]:
from sklearn.preprocessing import StandardScaler
X_train = StandardScaler().fit_transform(X_train)
X_test = StandardScaler().fit_transform(X_test)

In [7]:
# Call the PCA method with 200 components. 
pca = PCA(n_components=100)
pca.fit(X_train)
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)

# Invoking the t-SNE method
#tsne = TSNE()
#X_train = tsne.fit_transform(X_train)
#X_test = tsne.transform(X_test) 

In [8]:
input_dim = X_train.shape[1]
nb_classes = y_train.shape[1]

Construction of deep dumb MLP (DDMLP)

In [9]:
model = Sequential()
model.add(Dense(128, input_dim=input_dim))
model.add(Activation('relu'))
model.add(Dropout(0.15))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.15))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

In [10]:
model.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['accuracy'])

In [11]:
model.fit(X_train,y_train,nb_epoch=20,batch_size=64,validation_split=0.1,verbose=2)



Train on 37800 samples, validate on 4200 samples
Epoch 1/20
2s - loss: 0.4580 - acc: 0.8650 - val_loss: 0.1929 - val_acc: 0.9431
Epoch 2/20
2s - loss: 0.2198 - acc: 0.9359 - val_loss: 0.1552 - val_acc: 0.9545
Epoch 3/20
2s - loss: 0.1730 - acc: 0.9489 - val_loss: 0.1276 - val_acc: 0.9610
Epoch 4/20
2s - loss: 0.1474 - acc: 0.9565 - val_loss: 0.1197 - val_acc: 0.9657
Epoch 5/20
2s - loss: 0.1316 - acc: 0.9606 - val_loss: 0.1158 - val_acc: 0.9686
Epoch 6/20
2s - loss: 0.1174 - acc: 0.9649 - val_loss: 0.1172 - val_acc: 0.9683
Epoch 7/20
2s - loss: 0.1083 - acc: 0.9680 - val_loss: 0.1191 - val_acc: 0.9710
Epoch 8/20
2s - loss: 0.1029 - acc: 0.9699 - val_loss: 0.1063 - val_acc: 0.9683
Epoch 9/20
2s - loss: 0.0949 - acc: 0.9715 - val_loss: 0.1135 - val_acc: 0.9712
Epoch 10/20
2s - loss: 0.0906 - acc: 0.9724 - val_loss: 0.1072 - val_acc: 0.9736
Epoch 11/20
2s - loss: 0.0861 - acc: 0.9751 - val_loss: 0.1132 - val_acc: 0.9710
Epoch 12/20
2s - loss: 0.0823 - acc: 0.9764 - val_loss: 0.1160 - val_

<keras.callbacks.History at 0x7fa07d8b3dd8>

In [12]:
preds = model.predict_classes(X_test,verbose=0)

In [13]:
preds

array([2, 0, 9, ..., 3, 9, 2])

In [14]:
df_preds = pd.DataFrame({"ImageID":list(range(1,len(preds)+1)),"Label":preds})
df_preds.to_csv("eda.csv",index=False,header=True)