### Imports

In [1]:
# general imports
import pandas as pd
import numpy as np
from sklearn.cross_validation import train_test_split
from collections import Counter

# Keras specific imports
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.optimizers import SGD, Adadelta, Adagrad, Adam
from keras.layers.convolutional import Convolution2D
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import SReLU, ELU

Using Theano backend.


### Data

In [2]:
data = pd.read_csv('(19)_combined_verbose.csv')
data.head()

Unnamed: 0,--9,--8,--7,--6,--5,--4,--3,--2,--1,-0,...,z0,z1,z2,z3,z4,z5,z6,z7,z8,z9
0,1,1,1,1,1,1,1,1,1,0,...,0,0,0,0,0,0,0,0,0,0
1,1,1,1,1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,1,1,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,1,1,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,1,1,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
labels = pd.read_csv('one_hot_labels.csv')
labels.head()

Unnamed: 0,B,C,E,G,H,I,S,T
0,0,1,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0
3,0,0,0,0,1,0,0,0
4,0,0,0,0,1,0,0,0


In [4]:
# remove 'C' labels for hold-out validation set
ind = [i for i, l in enumerate(labels['C'].values) if l==1]

holdout_data = data.copy().values[ind]
holdout_labels = labels.copy().values[ind] # these aren't really labels 

In [5]:
print data.shape, labels.shape
data.drop(ind, inplace=True)
labels.drop(ind, inplace=True)
print data.shape, labels.shape

(268675, 912) (268675, 8)
(215217, 912) (215217, 8)


In [14]:
# now drop the 'C' class
labels.drop('C', axis=1, inplace=True)

### Validation Splits

In [15]:
X_train, X_test, y_train, y_test = train_test_split(data.values, labels.values)

In [16]:
# dimensions for models
n_input = data.shape[1]
n_output = labels.shape[1]

print n_input, n_output

912 7


### Simple Feed-Forward

In [33]:
model_1L = Sequential()

# 1st layer
model_1L.add(Dense(output_dim=500, input_dim=n_input))
model_1L.add(Activation('sigmoid'))

# 2nd layer
model_1L.add(Dense(input_dim=500, output_dim=n_output))
model_1L.add(Activation("softmax"))

In [34]:
model_1L.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [35]:
model_1L.fit(X_train, y_train, validation_split=(X_test, y_test), nb_epoch=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f9339bf8c10>

In [36]:
model_1L.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
dense_15 (Dense)                 (None, 500)           456500      dense_input_6[0][0]              
____________________________________________________________________________________________________
activation_6 (Activation)        (None, 500)           0           dense_15[0][0]                   
____________________________________________________________________________________________________
dense_16 (Dense)                 (None, 7)             3507        activation_6[0][0]               
____________________________________________________________________________________________________
activation_7 (Activation)        (None, 7)             0           dense_16[0][0]                   
Total params: 460007
______________________________________________________________________

#### Two Layer

In [8]:
model_2L = Sequential()

# first layer, 500 nodes, BatchNormalized, ELU and Dropout
model_2L.add(Dense(output_dim=500, input_dim=n_input))
model_2L.add(BatchNormalization())
model_2L.add(ELU(alpha=0.9))
model_2L.add(Dropout(0.5))

# second layer, 6 nodes, BatchNormalized, SoftMax
model_2L.add(Dense(input_dim=500, output_dim=n_output))
model_2L.add(BatchNormalization())
model_2L.add(Activation("softmax"))

In [9]:
model_2L.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [10]:
model_2L.fit(X_train, y_train, validation_split=(X_test, y_test), nb_epoch=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f92ed32ac90>

In [13]:
model_2L.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
dense_1 (Dense)                  (None, 500)           456500      dense_input_1[0][0]              
____________________________________________________________________________________________________
batchnormalization_1 (BatchNormal(None, 500)           1000        dense_1[0][0]                    
____________________________________________________________________________________________________
elu_1 (ELU)                      (None, 500)           0           batchnormalization_1[0][0]       
____________________________________________________________________________________________________
dropout_1 (Dropout)              (None, 500)           0           elu_1[0][0]                      
___________________________________________________________________________________________

#### Three Layer

In [29]:
model_3L = Sequential()

# first layer, 200 nodes, BatchNormalized, ELU and Dropout
model_3L.add(Dense(input_dim=n_input, output_dim=1000))
model_3L.add(BatchNormalization())
model_3L.add(ELU(alpha=0.9))
model_3L.add(Dropout(0.5))

# second layer, 200 nodes, BatchNormalized, ELU and Dropout
model_3L.add(Dense(input_dim=1000, output_dim=1000))
model_3L.add(BatchNormalization())
model_3L.add(ELU(alpha=0.9))
model_3L.add(Dropout(0.5))

# third layer, 6 nodes, BatchNormalized, SoftMax
model_3L.add(Dense(input_dim=1000, output_dim=n_output))
model_3L.add(BatchNormalization())
model_3L.add(Activation("softmax"))

In [30]:
model_3L.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [31]:
model_3L.fit(X_train, y_train, validation_split=(X_test, y_test), nb_epoch=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f933a62d250>

In [32]:
model_3L.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
dense_12 (Dense)                 (None, 1000)          913000      dense_input_5[0][0]              
____________________________________________________________________________________________________
batchnormalization_12 (BatchNorma(None, 1000)          2000        dense_12[0][0]                   
____________________________________________________________________________________________________
elu_8 (ELU)                      (None, 1000)          0           batchnormalization_12[0][0]      
____________________________________________________________________________________________________
dropout_8 (Dropout)              (None, 1000)          0           elu_8[0][0]                      
___________________________________________________________________________________________

Thoughts
- getting lots of oscillation on accuracy and loss
    - consider using an adaptive learning rate
        - http://keras.io/callbacks/#learningratescheduler