In [2]:
from datetime import datetime
from keras.layers import Dense, Input, Embedding, Dropout, Conv1D, MaxPooling1D, LSTM
from keras.layers.core import Flatten
from keras.models import Model
from keras import regularizers
from keras.optimizers import RMSprop
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard

from functions import *


# conf and preprocess -----------------------------------------
# -------------------------------------------------------------

# settings ---------------------
# ------------------------------

EMBEDDING = True
TYPE = 'embedding' if EMBEDDING else 'standard'
MODELPATH ='models/char-conv-' + TYPE + '-{epoch:02d}-{val_acc:.3f}-{val_loss:.3f}.hdf5'
FILTERS = 500
LR = 0.0001 if EMBEDDING else 0.00001


# generate dataset -------------
# ------------------------------

data, table = load_processed_data(False, not EMBEDDING)
print("input shape: ", np.shape(data.x_train))

Using TensorFlow backend.


reading raw data and preprocessing..
Train size: 9596, test size 1066
input shape:  (9596, 250)


In [3]:
# model architecture ------------------------------------------
# -------------------------------------------------------------


# input and embedding ----------
# ------------------------------

if EMBEDDING:

    inputlayer = Input(shape=(250,))
    network = Embedding(70, 16, input_length=250)(inputlayer)

else:
    inputlayer = Input(shape=(250 ,70))
    network = inputlayer


network = LSTM(128, return_sequences=True)(network)
network = LSTM(128)(network)
# fully connected --------------
# ------------------------------

#network = Flatten()(network)
#network = Dense(128, activation='relu')(network)
network = Dropout(0)(network)

# output
ypred = Dense(2, activation='softmax')(network)


# training ----------------------------------------------------
# -------------------------------------------------------------


# callbacks --------------------
# ------------------------------

# tensorboard
TB_DIR = 'logs/' + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '_' + TYPE

os.makedirs(TB_DIR)
tensorboard = TensorBoard(log_dir=TB_DIR)

# early stopping and checkpoint
estopping = EarlyStopping(monitor='val_acc', patience=1000)
checkpoint = ModelCheckpoint(filepath=MODELPATH, save_best_only=True)

# model-------------------------
# ------------------------------

optimizer = RMSprop(lr=0.01)


model = Model(inputs=inputlayer, outputs=ypred)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['acc'])

print(TB_DIR)
print(model.summary())

logs/2017-10-25 21:14:45_embedding
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 250)               0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 250, 16)           1120      
_________________________________________________________________
lstm_1 (LSTM)                (None, 250, 128)          74240     
_________________________________________________________________
lstm_2 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 258       
Total params: 207,202.0
Trainable params: 207,202.0
Non-trainable params: 0.0
_____________________________

In [None]:


# fit and run ------------------
# ------------------------------
try:
    hist = model.fit(data.x_train,
                     data.y_train,
                     validation_data=(data.x_test, data.y_test),
                     epochs=500,
                     batch_size=50,
                     shuffle=False,
                     verbose=2,
                     callbacks=[checkpoint, estopping, tensorboard])

except KeyboardInterrupt:    
    pass

Train on 9596 samples, validate on 1066 samples
Epoch 1/500
143s - loss: 0.7433 - acc: 0.5006 - val_loss: 0.7392 - val_acc: 0.4812
Epoch 2/500
136s - loss: 0.7250 - acc: 0.4967 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 3/500
134s - loss: 0.7236 - acc: 0.4973 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 4/500
135s - loss: 0.7236 - acc: 0.4973 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 5/500
137s - loss: 0.7236 - acc: 0.4973 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 6/500
134s - loss: 0.7236 - acc: 0.4973 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 7/500
135s - loss: 0.7236 - acc: 0.4973 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 8/500
138s - loss: 0.7236 - acc: 0.4973 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 9/500
135s - loss: 0.7236 - acc: 0.4973 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 10/500
137s - loss: 0.7236 - acc: 0.4973 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 11/500
136s - loss: 0.7236 - acc: 0.4973 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 12/500
135s - loss: 0.7236 - ac

133s - loss: 0.7236 - acc: 0.4973 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 99/500
132s - loss: 0.7236 - acc: 0.4973 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 100/500
132s - loss: 0.7236 - acc: 0.4973 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 101/500
132s - loss: 0.7236 - acc: 0.4973 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 102/500
132s - loss: 0.7236 - acc: 0.4973 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 103/500
132s - loss: 0.7236 - acc: 0.4973 - val_loss: 0.7393 - val_acc: 0.4812
Epoch 104/500
