In [31]:
# general imports
import pandas as pd
import numpy as np
from sklearn.cross_validation import train_test_split
from collections import Counter

# Keras specific imports
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.optimizers import SGD, Adadelta, Adagrad, Adam
from keras.layers.convolutional import Convolution2D
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import SReLU, ELU

### Data

In [5]:
data = pd.read_csv('../fixed_five.csv')
labels = pd.read_csv('../one_hot_labels.csv')

In [16]:
data.head()

Unnamed: 0,A-2,C-2,D-2,E-2,F-2,G-2,H-2,I-2,K-2,L-2,...,N2,P2,Q2,R2,S2,T2,V2,W2,Y2,-2
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
labels.head()

Unnamed: 0,H,E,T,S,B,U
0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,0.0,0.0,1.0
3,1.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0


In [6]:
print(data.shape, labels.shape)

(134815, 105) (134815, 6)


In [18]:
# trying with stratified training/testing sets
X_train, X_test, y_train, y_test = train_test_split(data.values, labels.values, stratify=labels.values)

In [19]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(606666, 105) (202224, 105) (606666, 6) (202224, 6)


In [33]:
train_ex = [np.argmax(yi) for yi in y_train]

In [34]:
Counter(train_ex)

Counter({0: 192183, 1: 84330, 2: 54473, 3: 41654, 4: 6132, 5: 227894})

### Modeling

In [25]:
# 3-Layer Network
model = Sequential()

In [26]:
# first layer, 200 nodes, BatchNormalized, ELU and Dropout
model.add(Dense(output_dim=200, input_dim=105))
model.add(BatchNormalization())
model.add(ELU(alpha=0.9))
model.add(Dropout(0.5))

# second layer, 200 nodes, BatchNormalized, ELU and Dropout
model.add(Dense(output_dim=200, input_dim=200))
model.add(BatchNormalization())
model.add(ELU(alpha=0.9))
model.add(Dropout(0.5))

# third layer, 6 nodes, BatchNormalized, SoftMax
model.add(Dense(input_dim=200, output_dim=6))
model.add(BatchNormalization())
model.add(Activation("softmax"))

In [27]:
# different optimizers
sgd = SGD(lr=1e-1, momentum=0.9, nesterov=True)
adam = Adam()
adag = Adagrad()
adad = Adadelta()

model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [28]:
model.fit(X_train, y_train, nb_epoch=5, batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x11686e198>

In [29]:
loss, acc = model.evaluate(X_test, y_test, batch_size=32)



In [30]:
acc

0.62343243136324078